#ifndef CUFFTDX_FFT_2187_FP32_FWD_PTX_HPP
#define CUFFTDX_FFT_2187_FP32_FWD_PTX_HPP



template<> __forceinline__ __device__ void cufftdx_private_function<145, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<2481>;
.reg .b32 r<25>;
.reg .b64 rd<16>;
mov.u32 r23, %tid.y;
mov.u32 r24, %54;
mad.lo.s32 r3, r23, 17496, r24;
add.f32 f109, %75, %93;
add.f32 f110, %57, f109;
mul.f32 f113, f109, 0f3F000000;
sub.f32 f114, %57, f113;
add.f32 f2480, %76, %94;
sub.f32 f115, %76, %94;
mul.f32 f116, f115, 0f3F5DB3D7;
add.f32 f117, f116, f114;
sub.f32 f118, f114, f116;
add.f32 f2479, %58, f2480;
mul.f32 f119, f2480, 0f3F000000;
sub.f32 f120, %58, f119;
sub.f32 f121, %75, %93;
mul.f32 f122, f121, 0f3F5DB3D7;
sub.f32 f123, f120, f122;
add.f32 f124, f122, f120;
add.f32 f125, %81, %99;
add.f32 f126, %63, f125;
mul.f32 f129, f125, 0f3F000000;
sub.f32 f130, %63, f129;
add.f32 f2478, %82, %100;
sub.f32 f131, %82, %100;
mul.f32 f132, f131, 0f3F5DB3D7;
add.f32 f133, f132, f130;
sub.f32 f134, f130, f132;
add.f32 f2477, %64, f2478;
mul.f32 f135, f2478, 0f3F000000;
sub.f32 f136, %64, f135;
sub.f32 f137, %81, %99;
mul.f32 f138, f137, 0f3F5DB3D7;
sub.f32 f139, f136, f138;
add.f32 f140, f138, f136;
add.f32 f141, %87, %105;
add.f32 f142, %69, f141;
mul.f32 f145, f141, 0f3F000000;
sub.f32 f146, %69, f145;
add.f32 f2476, %88, %106;
sub.f32 f147, %88, %106;
mul.f32 f148, f147, 0f3F5DB3D7;
add.f32 f149, f148, f146;
sub.f32 f150, f146, f148;
add.f32 f2475, %70, f2476;
mul.f32 f151, f2476, 0f3F000000;
sub.f32 f152, %70, f151;
sub.f32 f153, %87, %105;
mul.f32 f154, f153, 0f3F5DB3D7;
sub.f32 f155, f152, f154;
add.f32 f156, f154, f152;
mul.f32 f158, f139, 0fBF248DBB;
mul.f32 f2474, f133, 0f3F441B7D;
sub.f32 f159, f2474, f158;
mul.f32 f160, f139, 0f3F441B7D;
fma.rn.f32 f161, f133, 0fBF248DBB, f160;
mul.f32 f2472, f149, 0f3E31D0D4;
mul.f32 f2473, f155, 0fBF7C1C5C;
sub.f32 f164, f2472, f2473;
mul.f32 f165, f155, 0f3E31D0D4;
fma.rn.f32 f166, f149, 0fBF7C1C5C, f165;
mul.f32 f2470, f134, 0f3E31D0D4;
mul.f32 f2471, f140, 0fBF7C1C5C;
sub.f32 f169, f2470, f2471;
mul.f32 f170, f140, 0f3E31D0D4;
fma.rn.f32 f171, f134, 0fBF7C1C5C, f170;
mul.f32 f2468, f150, 0fBF708FB2;
mul.f32 f2469, f156, 0fBEAF1D44;
sub.f32 f174, f2468, f2469;
mul.f32 f175, f156, 0fBF708FB2;
fma.rn.f32 f176, f150, 0fBEAF1D44, f175;
add.f32 f177, f126, f142;
add.f32 f178, f110, f177;
mul.f32 f181, f177, 0f3F000000;
sub.f32 f182, f110, f181;
add.f32 f2467, f2477, f2475;
sub.f32 f183, f2477, f2475;
mul.f32 f184, f183, 0f3F5DB3D7;
add.f32 f185, f184, f182;
sub.f32 f186, f182, f184;
add.f32 f2466, f2479, f2467;
mul.f32 f187, f2467, 0f3F000000;
sub.f32 f188, f2479, f187;
sub.f32 f189, f126, f142;
mul.f32 f190, f189, 0f3F5DB3D7;
sub.f32 f191, f188, f190;
add.f32 f192, f190, f188;
add.f32 f193, f159, f164;
add.f32 f194, f117, f193;
mul.f32 f197, f193, 0f3F000000;
sub.f32 f198, f117, f197;
add.f32 f2465, f161, f166;
sub.f32 f199, f161, f166;
mul.f32 f200, f199, 0f3F5DB3D7;
add.f32 f201, f200, f198;
sub.f32 f202, f198, f200;
add.f32 f2464, f123, f2465;
mul.f32 f203, f2465, 0f3F000000;
sub.f32 f204, f123, f203;
sub.f32 f205, f159, f164;
mul.f32 f206, f205, 0f3F5DB3D7;
sub.f32 f207, f204, f206;
add.f32 f208, f206, f204;
add.f32 f209, f169, f174;
add.f32 f210, f118, f209;
mul.f32 f213, f209, 0f3F000000;
sub.f32 f214, f118, f213;
add.f32 f2463, f171, f176;
sub.f32 f215, f171, f176;
mul.f32 f216, f215, 0f3F5DB3D7;
add.f32 f217, f216, f214;
sub.f32 f218, f214, f216;
add.f32 f2462, f124, f2463;
mul.f32 f219, f2463, 0f3F000000;
sub.f32 f220, f124, f219;
sub.f32 f221, f169, f174;
mul.f32 f222, f221, 0f3F5DB3D7;
sub.f32 f223, f220, f222;
add.f32 f224, f222, f220;
add.f32 f225, %77, %95;
add.f32 f226, %59, f225;
mul.f32 f229, f225, 0f3F000000;
sub.f32 f230, %59, f229;
add.f32 f2459, %111, %112;
sub.f32 f231, %111, %112;
mul.f32 f232, f231, 0f3F5DB3D7;
add.f32 f233, f232, f230;
sub.f32 f234, f230, f232;
add.f32 f2457, %113, f2459;
mul.f32 f235, f2459, 0f3F000000;
sub.f32 f236, %113, f235;
sub.f32 f237, %77, %95;
mul.f32 f238, f237, 0f3F5DB3D7;
sub.f32 f239, f236, f238;
add.f32 f240, f238, f236;
add.f32 f241, %83, %101;
add.f32 f242, %65, f241;
mul.f32 f245, f241, 0f3F000000;
sub.f32 f246, %65, f245;
add.f32 f2454, %115, %114;
sub.f32 f247, %115, %114;
mul.f32 f248, f247, 0f3F5DB3D7;
add.f32 f249, f248, f246;
sub.f32 f250, f246, f248;
add.f32 f2452, %116, f2454;
mul.f32 f251, f2454, 0f3F000000;
sub.f32 f252, %116, f251;
sub.f32 f253, %83, %101;
mul.f32 f254, f253, 0f3F5DB3D7;
sub.f32 f255, f252, f254;
add.f32 f256, f254, f252;
add.f32 f257, %89, %107;
add.f32 f258, %71, f257;
mul.f32 f261, f257, 0f3F000000;
sub.f32 f262, %71, f261;
add.f32 f2449, %117, %118;
sub.f32 f263, %117, %118;
mul.f32 f264, f263, 0f3F5DB3D7;
add.f32 f265, f264, f262;
sub.f32 f266, f262, f264;
add.f32 f2447, %119, f2449;
mul.f32 f267, f2449, 0f3F000000;
sub.f32 f268, %119, f267;
sub.f32 f269, %89, %107;
mul.f32 f270, f269, 0f3F5DB3D7;
sub.f32 f271, f268, f270;
add.f32 f272, f270, f268;
mul.f32 f274, f255, 0fBF248DBB;
mul.f32 f2446, f249, 0f3F441B7D;
sub.f32 f275, f2446, f274;
mul.f32 f276, f255, 0f3F441B7D;
fma.rn.f32 f277, f249, 0fBF248DBB, f276;
mul.f32 f279, f271, 0fBF7C1C5C;
mul.f32 f2445, f265, 0f3E31D0D4;
sub.f32 f280, f2445, f279;
mul.f32 f281, f271, 0f3E31D0D4;
fma.rn.f32 f282, f265, 0fBF7C1C5C, f281;
mul.f32 f2443, f250, 0f3E31D0D4;
mul.f32 f2444, f256, 0fBF7C1C5C;
sub.f32 f285, f2443, f2444;
mul.f32 f286, f256, 0f3E31D0D4;
fma.rn.f32 f287, f250, 0fBF7C1C5C, f286;
mul.f32 f2441, f266, 0fBF708FB2;
mul.f32 f2442, f272, 0fBEAF1D44;
sub.f32 f290, f2441, f2442;
mul.f32 f291, f272, 0fBF708FB2;
fma.rn.f32 f292, f266, 0fBEAF1D44, f291;
add.f32 f293, f242, f258;
add.f32 f294, f226, f293;
mul.f32 f297, f293, 0f3F000000;
sub.f32 f298, f226, f297;
add.f32 f2440, f2452, f2447;
sub.f32 f299, f2452, f2447;
mul.f32 f300, f299, 0f3F5DB3D7;
add.f32 f301, f300, f298;
sub.f32 f302, f298, f300;
add.f32 f2439, f2457, f2440;
mul.f32 f303, f2440, 0f3F000000;
sub.f32 f304, f2457, f303;
sub.f32 f305, f242, f258;
mul.f32 f306, f305, 0f3F5DB3D7;
sub.f32 f307, f304, f306;
add.f32 f308, f306, f304;
add.f32 f309, f275, f280;
add.f32 f310, f233, f309;
mul.f32 f313, f309, 0f3F000000;
sub.f32 f314, f233, f313;
add.f32 f2438, f277, f282;
sub.f32 f315, f277, f282;
mul.f32 f316, f315, 0f3F5DB3D7;
add.f32 f317, f316, f314;
sub.f32 f318, f314, f316;
add.f32 f2437, f239, f2438;
mul.f32 f319, f2438, 0f3F000000;
sub.f32 f320, f239, f319;
sub.f32 f321, f275, f280;
mul.f32 f322, f321, 0f3F5DB3D7;
sub.f32 f323, f320, f322;
add.f32 f324, f322, f320;
add.f32 f325, f285, f290;
add.f32 f326, f234, f325;
mul.f32 f329, f325, 0f3F000000;
sub.f32 f330, f234, f329;
add.f32 f2436, f287, f292;
sub.f32 f331, f287, f292;
mul.f32 f332, f331, 0f3F5DB3D7;
add.f32 f333, f332, f330;
sub.f32 f334, f330, f332;
add.f32 f2435, f240, f2436;
mul.f32 f335, f2436, 0f3F000000;
sub.f32 f336, f240, f335;
sub.f32 f337, f285, f290;
mul.f32 f338, f337, 0f3F5DB3D7;
sub.f32 f339, f336, f338;
add.f32 f340, f338, f336;
add.f32 f341, %79, %97;
add.f32 f342, %61, f341;
mul.f32 f345, f341, 0f3F000000;
sub.f32 f346, %61, f345;
add.f32 f2432, %120, %121;
sub.f32 f347, %120, %121;
mul.f32 f348, f347, 0f3F5DB3D7;
add.f32 f349, f348, f346;
sub.f32 f350, f346, f348;
add.f32 f2430, %122, f2432;
mul.f32 f351, f2432, 0f3F000000;
sub.f32 f352, %122, f351;
sub.f32 f353, %79, %97;
mul.f32 f354, f353, 0f3F5DB3D7;
sub.f32 f355, f352, f354;
add.f32 f356, f354, f352;
add.f32 f357, %85, %103;
add.f32 f358, %67, f357;
mul.f32 f361, f357, 0f3F000000;
sub.f32 f362, %67, f361;
add.f32 f2427, %124, %123;
sub.f32 f363, %124, %123;
mul.f32 f364, f363, 0f3F5DB3D7;
add.f32 f365, f364, f362;
sub.f32 f366, f362, f364;
add.f32 f2425, %125, f2427;
mul.f32 f367, f2427, 0f3F000000;
sub.f32 f368, %125, f367;
sub.f32 f369, %85, %103;
mul.f32 f370, f369, 0f3F5DB3D7;
sub.f32 f371, f368, f370;
add.f32 f372, f370, f368;
add.f32 f373, %91, %109;
add.f32 f374, %73, f373;
mul.f32 f377, f373, 0f3F000000;
sub.f32 f378, %73, f377;
add.f32 f2423, %126, %110;
sub.f32 f379, %126, %110;
mul.f32 f380, f379, 0f3F5DB3D7;
add.f32 f381, f380, f378;
sub.f32 f382, f378, f380;
add.f32 f2421, %127, f2423;
mul.f32 f383, f2423, 0f3F000000;
sub.f32 f384, %127, f383;
sub.f32 f385, %91, %109;
mul.f32 f386, f385, 0f3F5DB3D7;
sub.f32 f387, f384, f386;
add.f32 f388, f386, f384;
mul.f32 f390, f371, 0fBF248DBB;
mul.f32 f2420, f365, 0f3F441B7D;
sub.f32 f391, f2420, f390;
mul.f32 f392, f371, 0f3F441B7D;
fma.rn.f32 f393, f365, 0fBF248DBB, f392;
mul.f32 f395, f387, 0fBF7C1C5C;
mul.f32 f2419, f381, 0f3E31D0D4;
sub.f32 f396, f2419, f395;
mul.f32 f397, f387, 0f3E31D0D4;
fma.rn.f32 f398, f381, 0fBF7C1C5C, f397;
mul.f32 f2417, f366, 0f3E31D0D4;
mul.f32 f2418, f372, 0fBF7C1C5C;
sub.f32 f401, f2417, f2418;
mul.f32 f402, f372, 0f3E31D0D4;
fma.rn.f32 f403, f366, 0fBF7C1C5C, f402;
mul.f32 f2415, f382, 0fBF708FB2;
mul.f32 f2416, f388, 0fBEAF1D44;
sub.f32 f406, f2415, f2416;
mul.f32 f407, f388, 0fBF708FB2;
fma.rn.f32 f408, f382, 0fBEAF1D44, f407;
add.f32 f409, f358, f374;
add.f32 f410, f342, f409;
mul.f32 f413, f409, 0f3F000000;
sub.f32 f414, f342, f413;
add.f32 f2414, f2425, f2421;
sub.f32 f415, f2425, f2421;
mul.f32 f416, f415, 0f3F5DB3D7;
add.f32 f417, f416, f414;
sub.f32 f418, f414, f416;
add.f32 f2413, f2430, f2414;
mul.f32 f419, f2414, 0f3F000000;
sub.f32 f420, f2430, f419;
sub.f32 f421, f358, f374;
mul.f32 f422, f421, 0f3F5DB3D7;
sub.f32 f423, f420, f422;
add.f32 f424, f422, f420;
add.f32 f425, f391, f396;
add.f32 f426, f349, f425;
mul.f32 f429, f425, 0f3F000000;
sub.f32 f430, f349, f429;
add.f32 f2412, f393, f398;
sub.f32 f431, f393, f398;
mul.f32 f432, f431, 0f3F5DB3D7;
add.f32 f433, f432, f430;
sub.f32 f434, f430, f432;
add.f32 f2411, f355, f2412;
mul.f32 f435, f2412, 0f3F000000;
sub.f32 f436, f355, f435;
sub.f32 f437, f391, f396;
mul.f32 f438, f437, 0f3F5DB3D7;
sub.f32 f439, f436, f438;
add.f32 f440, f438, f436;
add.f32 f441, f401, f406;
add.f32 f442, f350, f441;
mul.f32 f445, f441, 0f3F000000;
sub.f32 f446, f350, f445;
add.f32 f2410, f403, f408;
sub.f32 f447, f403, f408;
mul.f32 f448, f447, 0f3F5DB3D7;
add.f32 f449, f448, f446;
sub.f32 f450, f446, f448;
add.f32 f2409, f356, f2410;
mul.f32 f451, f2410, 0f3F000000;
sub.f32 f452, f356, f451;
sub.f32 f453, f401, f406;
mul.f32 f454, f453, 0f3F5DB3D7;
sub.f32 f455, f452, f454;
add.f32 f456, f454, f452;
mul.f32 f458, f2437, 0fBE6C2691;
mul.f32 f2408, f310, 0f3F791978;
sub.f32 f459, f2408, f458;
mul.f32 f460, f2437, 0f3F791978;
fma.rn.f32 f461, f310, 0fBE6C2691, f460;
mul.f32 f2406, f426, 0f3F64C51C;
mul.f32 f2407, f2411, 0fBEE5C902;
sub.f32 f464, f2406, f2407;
mul.f32 f465, f2411, 0f3F64C51C;
fma.rn.f32 f466, f426, 0fBEE5C902, f465;
mul.f32 f2404, f326, 0f3F64C51C;
mul.f32 f2405, f2435, 0fBEE5C902;
sub.f32 f469, f2404, f2405;
mul.f32 f470, f2435, 0f3F64C51C;
fma.rn.f32 f471, f326, 0fBEE5C902, f470;
mul.f32 f2402, f442, 0f3F18DF63;
mul.f32 f2403, f2409, 0fBF4D57F2;
sub.f32 f474, f2402, f2403;
mul.f32 f475, f2409, 0f3F18DF63;
fma.rn.f32 f476, f442, 0fBF4D57F2, f475;
mul.f32 f2400, f301, 0f3F441B7D;
mul.f32 f2401, f307, 0fBF248DBB;
sub.f32 f479, f2400, f2401;
mul.f32 f480, f307, 0f3F441B7D;
fma.rn.f32 f481, f301, 0fBF248DBB, f480;
mul.f32 f483, f423, 0fBF7C1C5C;
mul.f32 f2399, f417, 0f3E31D0D4;
sub.f32 f484, f2399, f483;
mul.f32 f485, f423, 0f3E31D0D4;
fma.rn.f32 f486, f417, 0fBF7C1C5C, f485;
mul.f32 f488, f323, 0fBF4D57F2;
mul.f32 f2398, f317, 0f3F18DF63;
sub.f32 f489, f2398, f488;
mul.f32 f490, f323, 0f3F18DF63;
fma.rn.f32 f491, f317, 0fBF4D57F2, f490;
mul.f32 f493, f439, 0fBF753ECD;
mul.f32 f2397, f433, 0fBE92D7E0;
sub.f32 f494, f2397, f493;
mul.f32 f495, f439, 0fBE92D7E0;
fma.rn.f32 f496, f433, 0fBF753ECD, f495;
mul.f32 f498, f339, 0fBF6B1036;
mul.f32 f2396, f333, 0f3ECACAF8;
sub.f32 f499, f2396, f498;
mul.f32 f500, f339, 0f3ECACAF8;
fma.rn.f32 f501, f333, 0fBF6B1036, f500;
mul.f32 f503, f455, 0fBF3A3529;
mul.f32 f2395, f449, 0fBF2FAD88;
sub.f32 f504, f2395, f503;
mul.f32 f505, f455, 0fBF2FAD88;
fma.rn.f32 f506, f449, 0fBF3A3529, f505;
mul.f32 f508, f308, 0fBF7C1C5C;
mul.f32 f2394, f302, 0f3E31D0D4;
sub.f32 f509, f2394, f508;
mul.f32 f510, f308, 0f3E31D0D4;
fma.rn.f32 f511, f302, 0fBF7C1C5C, f510;
mul.f32 f2392, f418, 0fBF708FB2;
mul.f32 f2393, f424, 0fBEAF1D44;
sub.f32 f514, f2392, f2393;
mul.f32 f515, f424, 0fBF708FB2;
fma.rn.f32 f516, f418, 0fBEAF1D44, f515;
mul.f32 f2390, f318, 0fBD6E2946;
mul.f32 f2391, f324, 0fBF7F9120;
sub.f32 f519, f2390, f2391;
mul.f32 f520, f324, 0fBD6E2946;
fma.rn.f32 f521, f318, 0fBF7F9120, f520;
mul.f32 f2388, f434, 0fBF7E44DE;
mul.f32 f2389, f440, 0f3DEDC21F;
sub.f32 f524, f2388, f2389;
mul.f32 f525, f440, 0fBF7E44DE;
fma.rn.f32 f526, f434, 0f3DEDC21F, f525;
mul.f32 f528, f340, 0fBF753ECD;
mul.f32 f2387, f334, 0fBE92D7E0;
sub.f32 f529, f2387, f528;
mul.f32 f530, f340, 0fBE92D7E0;
fma.rn.f32 f531, f334, 0fBF753ECD, f530;
mul.f32 f533, f456, 0f3F0CAC9F;
mul.f32 f2386, f450, 0fBF55E287;
sub.f32 f534, f2386, f533;
mul.f32 f535, f456, 0fBF55E287;
fma.rn.f32 f536, f450, 0f3F0CAC9F, f535;
add.f32 f537, f294, f410;
mul.f32 f539, f537, 0f3F000000;
sub.f32 f540, f178, f539;
add.f32 f2385, f2439, f2413;
sub.f32 f541, f2439, f2413;
mul.f32 f542, f541, 0f3F5DB3D7;
add.f32 f543, f542, f540;
sub.f32 f544, f540, f542;
mul.f32 f545, f2385, 0f3F000000;
sub.f32 f546, f2466, f545;
sub.f32 f547, f294, f410;
mul.f32 f548, f547, 0f3F5DB3D7;
sub.f32 f549, f546, f548;
add.f32 f550, f548, f546;
add.f32 f551, f459, f464;
add.f32 f552, f194, f551;
mul.f32 f555, f551, 0f3F000000;
sub.f32 f556, f194, f555;
add.f32 f2384, f461, f466;
sub.f32 f557, f461, f466;
mul.f32 f558, f557, 0f3F5DB3D7;
add.f32 f559, f558, f556;
sub.f32 f560, f556, f558;
add.f32 f2383, f2464, f2384;
mul.f32 f561, f2384, 0f3F000000;
sub.f32 f562, f2464, f561;
sub.f32 f563, f459, f464;
mul.f32 f564, f563, 0f3F5DB3D7;
sub.f32 f565, f562, f564;
add.f32 f566, f564, f562;
add.f32 f567, f469, f474;
add.f32 f568, f210, f567;
mul.f32 f571, f567, 0f3F000000;
sub.f32 f572, f210, f571;
add.f32 f2382, f471, f476;
sub.f32 f573, f471, f476;
mul.f32 f574, f573, 0f3F5DB3D7;
add.f32 f575, f574, f572;
sub.f32 f576, f572, f574;
add.f32 f2381, f2462, f2382;
mul.f32 f577, f2382, 0f3F000000;
sub.f32 f578, f2462, f577;
sub.f32 f579, f469, f474;
mul.f32 f580, f579, 0f3F5DB3D7;
sub.f32 f581, f578, f580;
add.f32 f582, f580, f578;
add.f32 f583, f479, f484;
add.f32 f584, f185, f583;
mul.f32 f587, f583, 0f3F000000;
sub.f32 f588, f185, f587;
add.f32 f2380, f481, f486;
sub.f32 f589, f481, f486;
mul.f32 f590, f589, 0f3F5DB3D7;
add.f32 f591, f590, f588;
sub.f32 f592, f588, f590;
add.f32 f2379, f191, f2380;
mul.f32 f593, f2380, 0f3F000000;
sub.f32 f594, f191, f593;
sub.f32 f595, f479, f484;
mul.f32 f596, f595, 0f3F5DB3D7;
sub.f32 f597, f594, f596;
add.f32 f598, f596, f594;
add.f32 f599, f489, f494;
add.f32 f600, f201, f599;
mul.f32 f603, f599, 0f3F000000;
sub.f32 f604, f201, f603;
add.f32 f2378, f491, f496;
sub.f32 f605, f491, f496;
mul.f32 f606, f605, 0f3F5DB3D7;
add.f32 f607, f606, f604;
sub.f32 f608, f604, f606;
add.f32 f2377, f207, f2378;
mul.f32 f609, f2378, 0f3F000000;
sub.f32 f610, f207, f609;
sub.f32 f611, f489, f494;
mul.f32 f612, f611, 0f3F5DB3D7;
sub.f32 f613, f610, f612;
add.f32 f614, f612, f610;
add.f32 f615, f499, f504;
add.f32 f616, f217, f615;
mul.f32 f619, f615, 0f3F000000;
sub.f32 f620, f217, f619;
add.f32 f2376, f501, f506;
sub.f32 f621, f501, f506;
mul.f32 f622, f621, 0f3F5DB3D7;
add.f32 f623, f622, f620;
sub.f32 f624, f620, f622;
add.f32 f2375, f223, f2376;
mul.f32 f625, f2376, 0f3F000000;
sub.f32 f626, f223, f625;
sub.f32 f627, f499, f504;
mul.f32 f628, f627, 0f3F5DB3D7;
sub.f32 f629, f626, f628;
add.f32 f630, f628, f626;
add.f32 f631, f509, f514;
add.f32 f632, f186, f631;
mul.f32 f635, f631, 0f3F000000;
sub.f32 f636, f186, f635;
add.f32 f2374, f511, f516;
sub.f32 f637, f511, f516;
mul.f32 f638, f637, 0f3F5DB3D7;
add.f32 f639, f638, f636;
sub.f32 f640, f636, f638;
add.f32 f2373, f192, f2374;
mul.f32 f641, f2374, 0f3F000000;
sub.f32 f642, f192, f641;
sub.f32 f643, f509, f514;
mul.f32 f644, f643, 0f3F5DB3D7;
sub.f32 f645, f642, f644;
add.f32 f646, f644, f642;
add.f32 f647, f519, f524;
add.f32 f648, f202, f647;
mul.f32 f651, f647, 0f3F000000;
sub.f32 f652, f202, f651;
add.f32 f2372, f521, f526;
sub.f32 f653, f521, f526;
mul.f32 f654, f653, 0f3F5DB3D7;
add.f32 f655, f654, f652;
sub.f32 f656, f652, f654;
add.f32 f2371, f208, f2372;
mul.f32 f657, f2372, 0f3F000000;
sub.f32 f658, f208, f657;
sub.f32 f659, f519, f524;
mul.f32 f660, f659, 0f3F5DB3D7;
sub.f32 f661, f658, f660;
add.f32 f662, f660, f658;
add.f32 f663, f529, f534;
add.f32 f664, f218, f663;
mul.f32 f667, f663, 0f3F000000;
sub.f32 f668, f218, f667;
add.f32 f2370, f531, f536;
sub.f32 f669, f531, f536;
mul.f32 f670, f669, 0f3F5DB3D7;
add.f32 f671, f670, f668;
sub.f32 f672, f668, f670;
add.f32 f2369, f224, f2370;
mul.f32 f673, f2370, 0f3F000000;
sub.f32 f674, f224, f673;
sub.f32 f675, f529, f534;
mul.f32 f676, f675, 0f3F5DB3D7;
sub.f32 f677, f674, f676;
add.f32 f678, f676, f674;
mov.u32 r22, %tid.x;
mul.wide.u32 rd2, r22, -901412889;
shr.u64 rd3, rd2, 38;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 81;
sub.s32 r7, r22, r6;
mad.lo.s32 r8, r5, 17496, r3;
mul.wide.u32 rd14, r7, 8;
mov.u64 rd15, %55;
add.s64 rd6, rd15, rd14;
ld.global.v2.f32 {f679, f680}, [rd6];
mul.f32 f684, f680, f2383;
mul.f32 f685, f679, f2383;
mul.f32 f2367, f679, f679;
mul.f32 f2368, f680, f680;
sub.f32 f688, f2367, f2368;
mul.f32 f689, f680, f679;
fma.rn.f32 f690, f680, f679, f689;
mul.f32 f692, f690, f2381;
mul.f32 f693, f688, f2381;
mul.f32 f695, f680, f690;
mul.f32 f2366, f679, f688;
sub.f32 f696, f2366, f695;
mul.f32 f2365, f688, f568;
mul.f32 f697, f679, f690;
fma.rn.f32 f698, f680, f688, f697;
mul.f32 f700, f698, f2379;
mul.f32 f701, f696, f2379;
mul.f32 f2363, f679, f696;
mul.f32 f2364, f680, f698;
sub.f32 f704, f2363, f2364;
mul.f32 f2362, f696, f584;
mul.f32 f705, f679, f698;
fma.rn.f32 f706, f680, f696, f705;
mul.f32 f708, f706, f2377;
mul.f32 f709, f704, f2377;
mul.f32 f711, f680, f706;
mul.f32 f2361, f679, f704;
sub.f32 f712, f2361, f711;
mul.f32 f2360, f704, f600;
mul.f32 f713, f679, f706;
fma.rn.f32 f714, f680, f704, f713;
mul.f32 f716, f714, f2375;
mul.f32 f717, f712, f2375;
mul.f32 f719, f680, f714;
mul.f32 f2359, f679, f712;
sub.f32 f720, f2359, f719;
mul.f32 f2358, f712, f616;
mul.f32 f721, f679, f714;
fma.rn.f32 f722, f680, f712, f721;
mul.f32 f724, f722, f2373;
mul.f32 f725, f720, f2373;
mul.f32 f2356, f679, f720;
mul.f32 f2357, f680, f722;
sub.f32 f728, f2356, f2357;
mul.f32 f2355, f720, f632;
mul.f32 f729, f679, f722;
fma.rn.f32 f730, f680, f720, f729;
mul.f32 f732, f730, f2371;
mul.f32 f733, f728, f2371;
mul.f32 f735, f680, f730;
mul.f32 f2354, f679, f728;
sub.f32 f736, f2354, f735;
mul.f32 f2353, f728, f648;
mul.f32 f737, f679, f730;
fma.rn.f32 f738, f680, f728, f737;
mul.f32 f740, f738, f2369;
mul.f32 f741, f736, f2369;
mul.f32 f743, f680, f738;
mul.f32 f2352, f679, f736;
sub.f32 f744, f2352, f743;
mul.f32 f2351, f736, f664;
mul.f32 f745, f679, f738;
fma.rn.f32 f746, f680, f736, f745;
mul.f32 f748, f746, f549;
mul.f32 f749, f744, f549;
mul.f32 f2349, f679, f744;
mul.f32 f2350, f680, f746;
sub.f32 f752, f2349, f2350;
mul.f32 f2348, f744, f543;
mul.f32 f753, f679, f746;
fma.rn.f32 f754, f680, f744, f753;
mul.f32 f756, f754, f565;
mul.f32 f757, f752, f565;
mul.f32 f759, f680, f754;
mul.f32 f2347, f679, f752;
sub.f32 f760, f2347, f759;
mul.f32 f2346, f752, f559;
mul.f32 f761, f679, f754;
fma.rn.f32 f762, f680, f752, f761;
mul.f32 f764, f762, f581;
mul.f32 f765, f760, f581;
mul.f32 f2344, f679, f760;
mul.f32 f2345, f680, f762;
sub.f32 f768, f2344, f2345;
mul.f32 f2343, f760, f575;
mul.f32 f769, f679, f762;
fma.rn.f32 f770, f680, f760, f769;
mul.f32 f772, f770, f597;
mul.f32 f773, f768, f597;
mul.f32 f775, f680, f770;
mul.f32 f2342, f679, f768;
sub.f32 f776, f2342, f775;
mul.f32 f2341, f768, f591;
mul.f32 f777, f679, f770;
fma.rn.f32 f778, f680, f768, f777;
mul.f32 f780, f778, f613;
mul.f32 f781, f776, f613;
mul.f32 f783, f680, f778;
mul.f32 f2340, f679, f776;
sub.f32 f784, f2340, f783;
mul.f32 f2339, f776, f607;
mul.f32 f785, f679, f778;
fma.rn.f32 f786, f680, f776, f785;
mul.f32 f788, f786, f629;
mul.f32 f789, f784, f629;
mul.f32 f2337, f679, f784;
mul.f32 f2338, f680, f786;
sub.f32 f792, f2337, f2338;
mul.f32 f2336, f784, f623;
mul.f32 f793, f679, f786;
fma.rn.f32 f794, f680, f784, f793;
mul.f32 f796, f794, f645;
mul.f32 f797, f792, f645;
mul.f32 f799, f680, f794;
mul.f32 f2335, f679, f792;
sub.f32 f800, f2335, f799;
mul.f32 f2334, f792, f639;
mul.f32 f801, f679, f794;
fma.rn.f32 f802, f680, f792, f801;
mul.f32 f804, f802, f661;
mul.f32 f805, f800, f661;
mul.f32 f807, f680, f802;
mul.f32 f2333, f679, f800;
sub.f32 f808, f2333, f807;
mul.f32 f2332, f800, f655;
mul.f32 f809, f679, f802;
fma.rn.f32 f810, f680, f800, f809;
mul.f32 f812, f810, f677;
mul.f32 f813, f808, f677;
mul.f32 f2330, f679, f808;
mul.f32 f2331, f680, f810;
sub.f32 f816, f2330, f2331;
mul.f32 f2329, f808, f671;
mul.f32 f817, f679, f810;
fma.rn.f32 f818, f680, f808, f817;
mul.f32 f820, f818, f550;
mul.f32 f821, f816, f550;
mul.f32 f823, f680, f818;
mul.f32 f2328, f679, f816;
sub.f32 f824, f2328, f823;
mul.f32 f2327, f816, f544;
mul.f32 f825, f679, f818;
fma.rn.f32 f826, f680, f816, f825;
mul.f32 f828, f826, f566;
mul.f32 f829, f824, f566;
mul.f32 f2325, f679, f824;
mul.f32 f2326, f680, f826;
sub.f32 f832, f2325, f2326;
mul.f32 f2324, f824, f560;
mul.f32 f833, f679, f826;
fma.rn.f32 f834, f680, f824, f833;
mul.f32 f836, f834, f582;
mul.f32 f837, f832, f582;
mul.f32 f839, f680, f834;
mul.f32 f2323, f679, f832;
sub.f32 f840, f2323, f839;
mul.f32 f2322, f832, f576;
mul.f32 f841, f679, f834;
fma.rn.f32 f842, f680, f832, f841;
mul.f32 f844, f842, f598;
mul.f32 f845, f840, f598;
mul.f32 f847, f680, f842;
mul.f32 f2321, f679, f840;
sub.f32 f848, f2321, f847;
mul.f32 f2320, f840, f592;
mul.f32 f849, f679, f842;
fma.rn.f32 f850, f680, f840, f849;
mul.f32 f852, f850, f614;
mul.f32 f853, f848, f614;
mul.f32 f2318, f679, f848;
mul.f32 f2319, f680, f850;
sub.f32 f856, f2318, f2319;
mul.f32 f2317, f848, f608;
mul.f32 f857, f679, f850;
fma.rn.f32 f858, f680, f848, f857;
mul.f32 f860, f858, f630;
mul.f32 f861, f856, f630;
mul.f32 f863, f680, f858;
mul.f32 f2316, f679, f856;
sub.f32 f864, f2316, f863;
mul.f32 f2315, f856, f624;
mul.f32 f865, f679, f858;
fma.rn.f32 f866, f680, f856, f865;
mul.f32 f868, f866, f646;
mul.f32 f869, f864, f646;
mul.f32 f871, f680, f866;
mul.f32 f2314, f679, f864;
sub.f32 f872, f2314, f871;
mul.f32 f2313, f864, f640;
mul.f32 f873, f679, f866;
fma.rn.f32 f874, f680, f864, f873;
mul.f32 f876, f874, f662;
mul.f32 f877, f872, f662;
mul.f32 f2311, f679, f872;
mul.f32 f2312, f680, f874;
sub.f32 f880, f2311, f2312;
mul.f32 f2310, f679, f552;
mul.f32 f881, f679, f874;
mul.f32 f2309, f872, f656;
fma.rn.f32 f882, f680, f872, f881;
mul.f32 f883, f880, f672;
mul.f32 f884, f882, f678;
mul.f32 f885, f880, f678;
barrier.sync 0;
add.f32 f886, f2466, f2385;
add.f32 f887, f178, f537;
mad.lo.s32 r21, r7, 216, r8;
st.shared.v2.f32 [r21], {f887, f886};
fma.rn.f32 f888, f680, f552, f685;
sub.f32 f889, f2310, f684;
st.shared.v2.f32 [r21+8], {f889, f888};
fma.rn.f32 f890, f690, f568, f693;
sub.f32 f891, f2365, f692;
st.shared.v2.f32 [r21+16], {f891, f890};
fma.rn.f32 f892, f698, f584, f701;
sub.f32 f893, f2362, f700;
st.shared.v2.f32 [r21+24], {f893, f892};
fma.rn.f32 f894, f706, f600, f709;
sub.f32 f895, f2360, f708;
st.shared.v2.f32 [r21+32], {f895, f894};
fma.rn.f32 f896, f714, f616, f717;
sub.f32 f897, f2358, f716;
st.shared.v2.f32 [r21+40], {f897, f896};
fma.rn.f32 f898, f722, f632, f725;
sub.f32 f899, f2355, f724;
st.shared.v2.f32 [r21+48], {f899, f898};
sub.f32 f900, f2353, f732;
fma.rn.f32 f901, f730, f648, f733;
st.shared.v2.f32 [r21+56], {f900, f901};
fma.rn.f32 f902, f738, f664, f741;
sub.f32 f903, f2351, f740;
st.shared.v2.f32 [r21+64], {f903, f902};
fma.rn.f32 f904, f746, f543, f749;
sub.f32 f905, f2348, f748;
st.shared.v2.f32 [r21+72], {f905, f904};
fma.rn.f32 f906, f754, f559, f757;
sub.f32 f907, f2346, f756;
st.shared.v2.f32 [r21+80], {f907, f906};
fma.rn.f32 f908, f762, f575, f765;
sub.f32 f909, f2343, f764;
st.shared.v2.f32 [r21+88], {f909, f908};
fma.rn.f32 f910, f770, f591, f773;
sub.f32 f911, f2341, f772;
st.shared.v2.f32 [r21+96], {f911, f910};
fma.rn.f32 f912, f778, f607, f781;
sub.f32 f913, f2339, f780;
st.shared.v2.f32 [r21+104], {f913, f912};
fma.rn.f32 f914, f786, f623, f789;
sub.f32 f915, f2336, f788;
st.shared.v2.f32 [r21+112], {f915, f914};
fma.rn.f32 f916, f794, f639, f797;
sub.f32 f917, f2334, f796;
st.shared.v2.f32 [r21+120], {f917, f916};
fma.rn.f32 f918, f802, f655, f805;
sub.f32 f919, f2332, f804;
st.shared.v2.f32 [r21+128], {f919, f918};
fma.rn.f32 f920, f810, f671, f813;
sub.f32 f921, f2329, f812;
st.shared.v2.f32 [r21+136], {f921, f920};
fma.rn.f32 f922, f818, f544, f821;
sub.f32 f923, f2327, f820;
st.shared.v2.f32 [r21+144], {f923, f922};
fma.rn.f32 f924, f826, f560, f829;
sub.f32 f925, f2324, f828;
st.shared.v2.f32 [r21+152], {f925, f924};
fma.rn.f32 f926, f834, f576, f837;
sub.f32 f927, f2322, f836;
st.shared.v2.f32 [r21+160], {f927, f926};
fma.rn.f32 f928, f842, f592, f845;
sub.f32 f929, f2320, f844;
st.shared.v2.f32 [r21+168], {f929, f928};
fma.rn.f32 f930, f850, f608, f853;
sub.f32 f931, f2317, f852;
st.shared.v2.f32 [r21+176], {f931, f930};
fma.rn.f32 f932, f858, f624, f861;
sub.f32 f933, f2315, f860;
st.shared.v2.f32 [r21+184], {f933, f932};
fma.rn.f32 f934, f866, f640, f869;
sub.f32 f935, f2313, f868;
st.shared.v2.f32 [r21+192], {f935, f934};
fma.rn.f32 f936, f874, f656, f877;
sub.f32 f937, f2309, f876;
st.shared.v2.f32 [r21+200], {f937, f936};
fma.rn.f32 f938, f882, f672, f885;
sub.f32 f939, f883, f884;
st.shared.v2.f32 [r21+208], {f939, f938};
barrier.sync 0;
mad.lo.s32 r10, r7, -208, r21;
ld.shared.v2.f32 {f940, f941}, [r10];
ld.shared.v2.f32 {f944, f945}, [r10+648];
ld.shared.v2.f32 {f948, f949}, [r10+1296];
ld.shared.v2.f32 {f952, f953}, [r10+1944];
ld.shared.v2.f32 {f956, f957}, [r10+2592];
ld.shared.v2.f32 {f960, f961}, [r10+3240];
ld.shared.v2.f32 {f964, f965}, [r10+3888];
ld.shared.v2.f32 {f968, f969}, [r10+4536];
ld.shared.v2.f32 {f972, f973}, [r10+5184];
ld.shared.v2.f32 {f976, f977}, [r10+5832];
ld.shared.v2.f32 {f980, f981}, [r10+6480];
ld.shared.v2.f32 {f984, f985}, [r10+7128];
ld.shared.v2.f32 {f988, f989}, [r10+7776];
ld.shared.v2.f32 {f992, f993}, [r10+8424];
ld.shared.v2.f32 {f996, f997}, [r10+9072];
ld.shared.v2.f32 {f1000, f1001}, [r10+9720];
ld.shared.v2.f32 {f1004, f1005}, [r10+10368];
ld.shared.v2.f32 {f1008, f1009}, [r10+11016];
ld.shared.v2.f32 {f1012, f1013}, [r10+11664];
ld.shared.v2.f32 {f1016, f1017}, [r10+12312];
ld.shared.v2.f32 {f1020, f1021}, [r10+12960];
ld.shared.v2.f32 {f1024, f1025}, [r10+13608];
ld.shared.v2.f32 {f1028, f1029}, [r10+14256];
ld.shared.v2.f32 {f1032, f1033}, [r10+14904];
ld.shared.v2.f32 {f1036, f1037}, [r10+15552];
ld.shared.v2.f32 {f1040, f1041}, [r10+16200];
ld.shared.v2.f32 {f1044, f1045}, [r10+16848];
add.f32 f1048, f976, f1012;
add.f32 f1049, f940, f1048;
mul.f32 f1052, f1048, 0f3F000000;
sub.f32 f1053, f940, f1052;
add.f32 f2308, f977, f1013;
sub.f32 f1054, f977, f1013;
mul.f32 f1055, f1054, 0f3F5DB3D7;
add.f32 f1056, f1055, f1053;
sub.f32 f1057, f1053, f1055;
add.f32 f2307, f941, f2308;
mul.f32 f1058, f2308, 0f3F000000;
sub.f32 f1059, f941, f1058;
sub.f32 f1060, f976, f1012;
mul.f32 f1061, f1060, 0f3F5DB3D7;
sub.f32 f1062, f1059, f1061;
add.f32 f1063, f1061, f1059;
add.f32 f1064, f988, f1024;
add.f32 f1065, f952, f1064;
mul.f32 f1068, f1064, 0f3F000000;
sub.f32 f1069, f952, f1068;
add.f32 f2306, f989, f1025;
sub.f32 f1070, f989, f1025;
mul.f32 f1071, f1070, 0f3F5DB3D7;
add.f32 f1072, f1071, f1069;
sub.f32 f1073, f1069, f1071;
add.f32 f2305, f953, f2306;
mul.f32 f1074, f2306, 0f3F000000;
sub.f32 f1075, f953, f1074;
sub.f32 f1076, f988, f1024;
mul.f32 f1077, f1076, 0f3F5DB3D7;
sub.f32 f1078, f1075, f1077;
add.f32 f1079, f1077, f1075;
add.f32 f1080, f1000, f1036;
add.f32 f1081, f964, f1080;
mul.f32 f1084, f1080, 0f3F000000;
sub.f32 f1085, f964, f1084;
add.f32 f2304, f1001, f1037;
sub.f32 f1086, f1001, f1037;
mul.f32 f1087, f1086, 0f3F5DB3D7;
add.f32 f1088, f1087, f1085;
sub.f32 f1089, f1085, f1087;
add.f32 f2303, f965, f2304;
mul.f32 f1090, f2304, 0f3F000000;
sub.f32 f1091, f965, f1090;
sub.f32 f1092, f1000, f1036;
mul.f32 f1093, f1092, 0f3F5DB3D7;
sub.f32 f1094, f1091, f1093;
add.f32 f1095, f1093, f1091;
mul.f32 f2301, f1072, 0f3F441B7D;
mul.f32 f2302, f1078, 0fBF248DBB;
sub.f32 f1098, f2301, f2302;
mul.f32 f1099, f1078, 0f3F441B7D;
fma.rn.f32 f1100, f1072, 0fBF248DBB, f1099;
mul.f32 f2299, f1088, 0f3E31D0D4;
mul.f32 f2300, f1094, 0fBF7C1C5C;
sub.f32 f1103, f2299, f2300;
mul.f32 f1104, f1094, 0f3E31D0D4;
fma.rn.f32 f1105, f1088, 0fBF7C1C5C, f1104;
mul.f32 f2297, f1073, 0f3E31D0D4;
mul.f32 f2298, f1079, 0fBF7C1C5C;
sub.f32 f1108, f2297, f2298;
mul.f32 f1109, f1079, 0f3E31D0D4;
fma.rn.f32 f1110, f1073, 0fBF7C1C5C, f1109;
mul.f32 f1112, f1095, 0fBEAF1D44;
mul.f32 f2296, f1089, 0fBF708FB2;
sub.f32 f1113, f2296, f1112;
mul.f32 f1114, f1095, 0fBF708FB2;
fma.rn.f32 f1115, f1089, 0fBEAF1D44, f1114;
add.f32 f1116, f1065, f1081;
add.f32 f1117, f1049, f1116;
mul.f32 f1120, f1116, 0f3F000000;
sub.f32 f1121, f1049, f1120;
add.f32 f2295, f2305, f2303;
sub.f32 f1122, f2305, f2303;
mul.f32 f1123, f1122, 0f3F5DB3D7;
add.f32 f1124, f1123, f1121;
sub.f32 f1125, f1121, f1123;
add.f32 f2294, f2307, f2295;
mul.f32 f1126, f2295, 0f3F000000;
sub.f32 f1127, f2307, f1126;
sub.f32 f1128, f1065, f1081;
mul.f32 f1129, f1128, 0f3F5DB3D7;
sub.f32 f1130, f1127, f1129;
add.f32 f1131, f1129, f1127;
add.f32 f1132, f1098, f1103;
add.f32 f1133, f1056, f1132;
mul.f32 f1136, f1132, 0f3F000000;
sub.f32 f1137, f1056, f1136;
add.f32 f2293, f1100, f1105;
sub.f32 f1138, f1100, f1105;
mul.f32 f1139, f1138, 0f3F5DB3D7;
add.f32 f1140, f1139, f1137;
sub.f32 f1141, f1137, f1139;
add.f32 f2292, f1062, f2293;
mul.f32 f1142, f2293, 0f3F000000;
sub.f32 f1143, f1062, f1142;
sub.f32 f1144, f1098, f1103;
mul.f32 f1145, f1144, 0f3F5DB3D7;
sub.f32 f1146, f1143, f1145;
add.f32 f1147, f1145, f1143;
add.f32 f1148, f1108, f1113;
add.f32 f1149, f1057, f1148;
mul.f32 f1152, f1148, 0f3F000000;
sub.f32 f1153, f1057, f1152;
add.f32 f2291, f1110, f1115;
sub.f32 f1154, f1110, f1115;
mul.f32 f1155, f1154, 0f3F5DB3D7;
add.f32 f1156, f1155, f1153;
sub.f32 f1157, f1153, f1155;
add.f32 f2290, f1063, f2291;
mul.f32 f1158, f2291, 0f3F000000;
sub.f32 f1159, f1063, f1158;
sub.f32 f1160, f1108, f1113;
mul.f32 f1161, f1160, 0f3F5DB3D7;
sub.f32 f1162, f1159, f1161;
add.f32 f1163, f1161, f1159;
add.f32 f1164, f980, f1016;
add.f32 f1165, f944, f1164;
mul.f32 f1168, f1164, 0f3F000000;
sub.f32 f1169, f944, f1168;
add.f32 f2289, f981, f1017;
sub.f32 f1170, f981, f1017;
mul.f32 f1171, f1170, 0f3F5DB3D7;
add.f32 f1172, f1171, f1169;
sub.f32 f1173, f1169, f1171;
add.f32 f2288, f945, f2289;
mul.f32 f1174, f2289, 0f3F000000;
sub.f32 f1175, f945, f1174;
sub.f32 f1176, f980, f1016;
mul.f32 f1177, f1176, 0f3F5DB3D7;
sub.f32 f1178, f1175, f1177;
add.f32 f1179, f1177, f1175;
add.f32 f1180, f992, f1028;
add.f32 f1181, f956, f1180;
mul.f32 f1184, f1180, 0f3F000000;
sub.f32 f1185, f956, f1184;
add.f32 f2287, f993, f1029;
sub.f32 f1186, f993, f1029;
mul.f32 f1187, f1186, 0f3F5DB3D7;
add.f32 f1188, f1187, f1185;
sub.f32 f1189, f1185, f1187;
add.f32 f2286, f957, f2287;
mul.f32 f1190, f2287, 0f3F000000;
sub.f32 f1191, f957, f1190;
sub.f32 f1192, f992, f1028;
mul.f32 f1193, f1192, 0f3F5DB3D7;
sub.f32 f1194, f1191, f1193;
add.f32 f1195, f1193, f1191;
add.f32 f1196, f1004, f1040;
add.f32 f1197, f968, f1196;
mul.f32 f1200, f1196, 0f3F000000;
sub.f32 f1201, f968, f1200;
add.f32 f2285, f1005, f1041;
sub.f32 f1202, f1005, f1041;
mul.f32 f1203, f1202, 0f3F5DB3D7;
add.f32 f1204, f1203, f1201;
sub.f32 f1205, f1201, f1203;
add.f32 f2284, f969, f2285;
mul.f32 f1206, f2285, 0f3F000000;
sub.f32 f1207, f969, f1206;
sub.f32 f1208, f1004, f1040;
mul.f32 f1209, f1208, 0f3F5DB3D7;
sub.f32 f1210, f1207, f1209;
add.f32 f1211, f1209, f1207;
mul.f32 f2282, f1188, 0f3F441B7D;
mul.f32 f2283, f1194, 0fBF248DBB;
sub.f32 f1214, f2282, f2283;
mul.f32 f1215, f1194, 0f3F441B7D;
fma.rn.f32 f1216, f1188, 0fBF248DBB, f1215;
mul.f32 f2280, f1204, 0f3E31D0D4;
mul.f32 f2281, f1210, 0fBF7C1C5C;
sub.f32 f1219, f2280, f2281;
mul.f32 f1220, f1210, 0f3E31D0D4;
fma.rn.f32 f1221, f1204, 0fBF7C1C5C, f1220;
mul.f32 f2278, f1189, 0f3E31D0D4;
mul.f32 f2279, f1195, 0fBF7C1C5C;
sub.f32 f1224, f2278, f2279;
mul.f32 f1225, f1195, 0f3E31D0D4;
fma.rn.f32 f1226, f1189, 0fBF7C1C5C, f1225;
mul.f32 f2276, f1205, 0fBF708FB2;
mul.f32 f2277, f1211, 0fBEAF1D44;
sub.f32 f1229, f2276, f2277;
mul.f32 f1230, f1211, 0fBF708FB2;
fma.rn.f32 f1231, f1205, 0fBEAF1D44, f1230;
add.f32 f1232, f1181, f1197;
add.f32 f1233, f1165, f1232;
mul.f32 f1236, f1232, 0f3F000000;
sub.f32 f1237, f1165, f1236;
add.f32 f2275, f2286, f2284;
sub.f32 f1238, f2286, f2284;
mul.f32 f1239, f1238, 0f3F5DB3D7;
add.f32 f1240, f1239, f1237;
sub.f32 f1241, f1237, f1239;
add.f32 f2274, f2288, f2275;
mul.f32 f1242, f2275, 0f3F000000;
sub.f32 f1243, f2288, f1242;
sub.f32 f1244, f1181, f1197;
mul.f32 f1245, f1244, 0f3F5DB3D7;
sub.f32 f1246, f1243, f1245;
add.f32 f1247, f1245, f1243;
add.f32 f1248, f1214, f1219;
add.f32 f1249, f1172, f1248;
mul.f32 f1252, f1248, 0f3F000000;
sub.f32 f1253, f1172, f1252;
add.f32 f2273, f1216, f1221;
sub.f32 f1254, f1216, f1221;
mul.f32 f1255, f1254, 0f3F5DB3D7;
add.f32 f1256, f1255, f1253;
sub.f32 f1257, f1253, f1255;
add.f32 f2272, f1178, f2273;
mul.f32 f1258, f2273, 0f3F000000;
sub.f32 f1259, f1178, f1258;
sub.f32 f1260, f1214, f1219;
mul.f32 f1261, f1260, 0f3F5DB3D7;
sub.f32 f1262, f1259, f1261;
add.f32 f1263, f1261, f1259;
add.f32 f1264, f1224, f1229;
add.f32 f1265, f1173, f1264;
mul.f32 f1268, f1264, 0f3F000000;
sub.f32 f1269, f1173, f1268;
add.f32 f2271, f1226, f1231;
sub.f32 f1270, f1226, f1231;
mul.f32 f1271, f1270, 0f3F5DB3D7;
add.f32 f1272, f1271, f1269;
sub.f32 f1273, f1269, f1271;
add.f32 f2270, f1179, f2271;
mul.f32 f1274, f2271, 0f3F000000;
sub.f32 f1275, f1179, f1274;
sub.f32 f1276, f1224, f1229;
mul.f32 f1277, f1276, 0f3F5DB3D7;
sub.f32 f1278, f1275, f1277;
add.f32 f1279, f1277, f1275;
add.f32 f1280, f984, f1020;
add.f32 f1281, f948, f1280;
mul.f32 f1284, f1280, 0f3F000000;
sub.f32 f1285, f948, f1284;
add.f32 f2269, f985, f1021;
sub.f32 f1286, f985, f1021;
mul.f32 f1287, f1286, 0f3F5DB3D7;
add.f32 f1288, f1287, f1285;
sub.f32 f1289, f1285, f1287;
add.f32 f2268, f949, f2269;
mul.f32 f1290, f2269, 0f3F000000;
sub.f32 f1291, f949, f1290;
sub.f32 f1292, f984, f1020;
mul.f32 f1293, f1292, 0f3F5DB3D7;
sub.f32 f1294, f1291, f1293;
add.f32 f1295, f1293, f1291;
add.f32 f1296, f996, f1032;
add.f32 f1297, f960, f1296;
mul.f32 f1300, f1296, 0f3F000000;
sub.f32 f1301, f960, f1300;
add.f32 f2267, f997, f1033;
sub.f32 f1302, f997, f1033;
mul.f32 f1303, f1302, 0f3F5DB3D7;
add.f32 f1304, f1303, f1301;
sub.f32 f1305, f1301, f1303;
add.f32 f2266, f961, f2267;
mul.f32 f1306, f2267, 0f3F000000;
sub.f32 f1307, f961, f1306;
sub.f32 f1308, f996, f1032;
mul.f32 f1309, f1308, 0f3F5DB3D7;
sub.f32 f1310, f1307, f1309;
add.f32 f1311, f1309, f1307;
add.f32 f1312, f1008, f1044;
add.f32 f1313, f972, f1312;
mul.f32 f1316, f1312, 0f3F000000;
sub.f32 f1317, f972, f1316;
add.f32 f2265, f1009, f1045;
sub.f32 f1318, f1009, f1045;
mul.f32 f1319, f1318, 0f3F5DB3D7;
add.f32 f1320, f1319, f1317;
sub.f32 f1321, f1317, f1319;
add.f32 f2264, f973, f2265;
mul.f32 f1322, f2265, 0f3F000000;
sub.f32 f1323, f973, f1322;
sub.f32 f1324, f1008, f1044;
mul.f32 f1325, f1324, 0f3F5DB3D7;
sub.f32 f1326, f1323, f1325;
add.f32 f1327, f1325, f1323;
mul.f32 f1329, f1310, 0fBF248DBB;
mul.f32 f2263, f1304, 0f3F441B7D;
sub.f32 f1330, f2263, f1329;
mul.f32 f1331, f1310, 0f3F441B7D;
fma.rn.f32 f1332, f1304, 0fBF248DBB, f1331;
mul.f32 f2261, f1320, 0f3E31D0D4;
mul.f32 f2262, f1326, 0fBF7C1C5C;
sub.f32 f1335, f2261, f2262;
mul.f32 f1336, f1326, 0f3E31D0D4;
fma.rn.f32 f1337, f1320, 0fBF7C1C5C, f1336;
mul.f32 f2259, f1305, 0f3E31D0D4;
mul.f32 f2260, f1311, 0fBF7C1C5C;
sub.f32 f1340, f2259, f2260;
mul.f32 f1341, f1311, 0f3E31D0D4;
fma.rn.f32 f1342, f1305, 0fBF7C1C5C, f1341;
mul.f32 f2257, f1321, 0fBF708FB2;
mul.f32 f2258, f1327, 0fBEAF1D44;
sub.f32 f1345, f2257, f2258;
mul.f32 f1346, f1327, 0fBF708FB2;
fma.rn.f32 f1347, f1321, 0fBEAF1D44, f1346;
add.f32 f1348, f1297, f1313;
add.f32 f1349, f1281, f1348;
mul.f32 f1352, f1348, 0f3F000000;
sub.f32 f1353, f1281, f1352;
add.f32 f2256, f2266, f2264;
sub.f32 f1354, f2266, f2264;
mul.f32 f1355, f1354, 0f3F5DB3D7;
add.f32 f1356, f1355, f1353;
sub.f32 f1357, f1353, f1355;
add.f32 f2255, f2268, f2256;
mul.f32 f1358, f2256, 0f3F000000;
sub.f32 f1359, f2268, f1358;
sub.f32 f1360, f1297, f1313;
mul.f32 f1361, f1360, 0f3F5DB3D7;
sub.f32 f1362, f1359, f1361;
add.f32 f1363, f1361, f1359;
add.f32 f1364, f1330, f1335;
add.f32 f1365, f1288, f1364;
mul.f32 f1368, f1364, 0f3F000000;
sub.f32 f1369, f1288, f1368;
add.f32 f2254, f1332, f1337;
sub.f32 f1370, f1332, f1337;
mul.f32 f1371, f1370, 0f3F5DB3D7;
add.f32 f1372, f1371, f1369;
sub.f32 f1373, f1369, f1371;
add.f32 f2253, f1294, f2254;
mul.f32 f1374, f2254, 0f3F000000;
sub.f32 f1375, f1294, f1374;
sub.f32 f1376, f1330, f1335;
mul.f32 f1377, f1376, 0f3F5DB3D7;
sub.f32 f1378, f1375, f1377;
add.f32 f1379, f1377, f1375;
add.f32 f1380, f1340, f1345;
add.f32 f1381, f1289, f1380;
mul.f32 f1384, f1380, 0f3F000000;
sub.f32 f1385, f1289, f1384;
add.f32 f2252, f1342, f1347;
sub.f32 f1386, f1342, f1347;
mul.f32 f1387, f1386, 0f3F5DB3D7;
add.f32 f1388, f1387, f1385;
sub.f32 f1389, f1385, f1387;
add.f32 f2251, f1295, f2252;
mul.f32 f1390, f2252, 0f3F000000;
sub.f32 f1391, f1295, f1390;
sub.f32 f1392, f1340, f1345;
mul.f32 f1393, f1392, 0f3F5DB3D7;
sub.f32 f1394, f1391, f1393;
add.f32 f1395, f1393, f1391;
mul.f32 f1397, f2272, 0fBE6C2691;
mul.f32 f2250, f1249, 0f3F791978;
sub.f32 f1398, f2250, f1397;
mul.f32 f1399, f2272, 0f3F791978;
fma.rn.f32 f1400, f1249, 0fBE6C2691, f1399;
mul.f32 f1402, f2253, 0fBEE5C902;
mul.f32 f2249, f1365, 0f3F64C51C;
sub.f32 f1403, f2249, f1402;
mul.f32 f1404, f2253, 0f3F64C51C;
fma.rn.f32 f1405, f1365, 0fBEE5C902, f1404;
mul.f32 f1407, f2270, 0fBEE5C902;
mul.f32 f2248, f1265, 0f3F64C51C;
sub.f32 f1408, f2248, f1407;
mul.f32 f1409, f2270, 0f3F64C51C;
fma.rn.f32 f1410, f1265, 0fBEE5C902, f1409;
mul.f32 f2246, f1381, 0f3F18DF63;
mul.f32 f2247, f2251, 0fBF4D57F2;
sub.f32 f1413, f2246, f2247;
mul.f32 f1414, f2251, 0f3F18DF63;
fma.rn.f32 f1415, f1381, 0fBF4D57F2, f1414;
mul.f32 f2244, f1240, 0f3F441B7D;
mul.f32 f2245, f1246, 0fBF248DBB;
sub.f32 f1418, f2244, f2245;
mul.f32 f1419, f1246, 0f3F441B7D;
fma.rn.f32 f1420, f1240, 0fBF248DBB, f1419;
mul.f32 f2242, f1356, 0f3E31D0D4;
mul.f32 f2243, f1362, 0fBF7C1C5C;
sub.f32 f1423, f2242, f2243;
mul.f32 f1424, f1362, 0f3E31D0D4;
fma.rn.f32 f1425, f1356, 0fBF7C1C5C, f1424;
mul.f32 f2240, f1256, 0f3F18DF63;
mul.f32 f2241, f1262, 0fBF4D57F2;
sub.f32 f1428, f2240, f2241;
mul.f32 f1429, f1262, 0f3F18DF63;
fma.rn.f32 f1430, f1256, 0fBF4D57F2, f1429;
mul.f32 f1432, f1378, 0fBF753ECD;
mul.f32 f2239, f1372, 0fBE92D7E0;
sub.f32 f1433, f2239, f1432;
mul.f32 f1434, f1378, 0fBE92D7E0;
fma.rn.f32 f1435, f1372, 0fBF753ECD, f1434;
mul.f32 f1437, f1278, 0fBF6B1036;
mul.f32 f2238, f1272, 0f3ECACAF8;
sub.f32 f1438, f2238, f1437;
mul.f32 f1439, f1278, 0f3ECACAF8;
fma.rn.f32 f1440, f1272, 0fBF6B1036, f1439;
mul.f32 f1442, f1394, 0fBF3A3529;
mul.f32 f2237, f1388, 0fBF2FAD88;
sub.f32 f1443, f2237, f1442;
mul.f32 f1444, f1394, 0fBF2FAD88;
fma.rn.f32 f1445, f1388, 0fBF3A3529, f1444;
mul.f32 f1447, f1247, 0fBF7C1C5C;
mul.f32 f2236, f1241, 0f3E31D0D4;
sub.f32 f1448, f2236, f1447;
mul.f32 f1449, f1247, 0f3E31D0D4;
fma.rn.f32 f1450, f1241, 0fBF7C1C5C, f1449;
mul.f32 f1452, f1363, 0fBEAF1D44;
mul.f32 f2235, f1357, 0fBF708FB2;
sub.f32 f1453, f2235, f1452;
mul.f32 f1454, f1363, 0fBF708FB2;
fma.rn.f32 f1455, f1357, 0fBEAF1D44, f1454;
mul.f32 f1457, f1263, 0fBF7F9120;
mul.f32 f2234, f1257, 0fBD6E2946;
sub.f32 f1458, f2234, f1457;
mul.f32 f1459, f1263, 0fBD6E2946;
fma.rn.f32 f1460, f1257, 0fBF7F9120, f1459;
mul.f32 f2232, f1373, 0fBF7E44DE;
mul.f32 f2233, f1379, 0f3DEDC21F;
sub.f32 f1463, f2232, f2233;
mul.f32 f1464, f1379, 0fBF7E44DE;
fma.rn.f32 f1465, f1373, 0f3DEDC21F, f1464;
mul.f32 f2230, f1273, 0fBE92D7E0;
mul.f32 f2231, f1279, 0fBF753ECD;
sub.f32 f1468, f2230, f2231;
mul.f32 f1469, f1279, 0fBE92D7E0;
fma.rn.f32 f1470, f1273, 0fBF753ECD, f1469;
mul.f32 f2228, f1389, 0fBF55E287;
mul.f32 f2229, f1395, 0f3F0CAC9F;
sub.f32 f1473, f2228, f2229;
mul.f32 f1474, f1395, 0fBF55E287;
fma.rn.f32 f1475, f1389, 0f3F0CAC9F, f1474;
add.f32 f1476, f1233, f1349;
mul.f32 f1478, f1476, 0f3F000000;
sub.f32 f1479, f1117, f1478;
add.f32 f2227, f2274, f2255;
sub.f32 f1480, f2274, f2255;
mul.f32 f1481, f1480, 0f3F5DB3D7;
add.f32 f1482, f1481, f1479;
sub.f32 f1483, f1479, f1481;
mul.f32 f1484, f2227, 0f3F000000;
sub.f32 f1485, f2294, f1484;
sub.f32 f1486, f1233, f1349;
mul.f32 f1487, f1486, 0f3F5DB3D7;
sub.f32 f1488, f1485, f1487;
add.f32 f1489, f1487, f1485;
add.f32 f1490, f1398, f1403;
add.f32 f1491, f1133, f1490;
mul.f32 f1494, f1490, 0f3F000000;
sub.f32 f1495, f1133, f1494;
add.f32 f2226, f1400, f1405;
sub.f32 f1496, f1400, f1405;
mul.f32 f1497, f1496, 0f3F5DB3D7;
add.f32 f1498, f1497, f1495;
sub.f32 f1499, f1495, f1497;
add.f32 f2225, f2292, f2226;
mul.f32 f1500, f2226, 0f3F000000;
sub.f32 f1501, f2292, f1500;
sub.f32 f1502, f1398, f1403;
mul.f32 f1503, f1502, 0f3F5DB3D7;
sub.f32 f1504, f1501, f1503;
add.f32 f1505, f1503, f1501;
add.f32 f1506, f1408, f1413;
add.f32 f1507, f1149, f1506;
mul.f32 f1510, f1506, 0f3F000000;
sub.f32 f1511, f1149, f1510;
add.f32 f2224, f1410, f1415;
sub.f32 f1512, f1410, f1415;
mul.f32 f1513, f1512, 0f3F5DB3D7;
add.f32 f1514, f1513, f1511;
sub.f32 f1515, f1511, f1513;
add.f32 f2223, f2290, f2224;
mul.f32 f1516, f2224, 0f3F000000;
sub.f32 f1517, f2290, f1516;
sub.f32 f1518, f1408, f1413;
mul.f32 f1519, f1518, 0f3F5DB3D7;
sub.f32 f1520, f1517, f1519;
add.f32 f1521, f1519, f1517;
add.f32 f1522, f1418, f1423;
add.f32 f1523, f1124, f1522;
mul.f32 f1526, f1522, 0f3F000000;
sub.f32 f1527, f1124, f1526;
add.f32 f2222, f1420, f1425;
sub.f32 f1528, f1420, f1425;
mul.f32 f1529, f1528, 0f3F5DB3D7;
add.f32 f1530, f1529, f1527;
sub.f32 f1531, f1527, f1529;
add.f32 f2221, f1130, f2222;
mul.f32 f1532, f2222, 0f3F000000;
sub.f32 f1533, f1130, f1532;
sub.f32 f1534, f1418, f1423;
mul.f32 f1535, f1534, 0f3F5DB3D7;
sub.f32 f1536, f1533, f1535;
add.f32 f1537, f1535, f1533;
add.f32 f1538, f1428, f1433;
add.f32 f1539, f1140, f1538;
mul.f32 f1542, f1538, 0f3F000000;
sub.f32 f1543, f1140, f1542;
add.f32 f2220, f1430, f1435;
sub.f32 f1544, f1430, f1435;
mul.f32 f1545, f1544, 0f3F5DB3D7;
add.f32 f1546, f1545, f1543;
sub.f32 f1547, f1543, f1545;
add.f32 f2219, f1146, f2220;
mul.f32 f1548, f2220, 0f3F000000;
sub.f32 f1549, f1146, f1548;
sub.f32 f1550, f1428, f1433;
mul.f32 f1551, f1550, 0f3F5DB3D7;
sub.f32 f1552, f1549, f1551;
add.f32 f1553, f1551, f1549;
add.f32 f1554, f1438, f1443;
add.f32 f1555, f1156, f1554;
mul.f32 f1558, f1554, 0f3F000000;
sub.f32 f1559, f1156, f1558;
add.f32 f2218, f1440, f1445;
sub.f32 f1560, f1440, f1445;
mul.f32 f1561, f1560, 0f3F5DB3D7;
add.f32 f1562, f1561, f1559;
sub.f32 f1563, f1559, f1561;
add.f32 f2217, f1162, f2218;
mul.f32 f1564, f2218, 0f3F000000;
sub.f32 f1565, f1162, f1564;
sub.f32 f1566, f1438, f1443;
mul.f32 f1567, f1566, 0f3F5DB3D7;
sub.f32 f1568, f1565, f1567;
add.f32 f1569, f1567, f1565;
add.f32 f1570, f1448, f1453;
add.f32 f1571, f1125, f1570;
mul.f32 f1574, f1570, 0f3F000000;
sub.f32 f1575, f1125, f1574;
add.f32 f2216, f1450, f1455;
sub.f32 f1576, f1450, f1455;
mul.f32 f1577, f1576, 0f3F5DB3D7;
add.f32 f1578, f1577, f1575;
sub.f32 f1579, f1575, f1577;
add.f32 f2215, f1131, f2216;
mul.f32 f1580, f2216, 0f3F000000;
sub.f32 f1581, f1131, f1580;
sub.f32 f1582, f1448, f1453;
mul.f32 f1583, f1582, 0f3F5DB3D7;
sub.f32 f1584, f1581, f1583;
add.f32 f1585, f1583, f1581;
add.f32 f1586, f1458, f1463;
add.f32 f1587, f1141, f1586;
mul.f32 f1590, f1586, 0f3F000000;
sub.f32 f1591, f1141, f1590;
add.f32 f2214, f1460, f1465;
sub.f32 f1592, f1460, f1465;
mul.f32 f1593, f1592, 0f3F5DB3D7;
add.f32 f1594, f1593, f1591;
sub.f32 f1595, f1591, f1593;
add.f32 f2213, f1147, f2214;
mul.f32 f1596, f2214, 0f3F000000;
sub.f32 f1597, f1147, f1596;
sub.f32 f1598, f1458, f1463;
mul.f32 f1599, f1598, 0f3F5DB3D7;
sub.f32 f1600, f1597, f1599;
add.f32 f1601, f1599, f1597;
add.f32 f1602, f1468, f1473;
add.f32 f1603, f1157, f1602;
mul.f32 f1606, f1602, 0f3F000000;
sub.f32 f1607, f1157, f1606;
add.f32 f2212, f1470, f1475;
sub.f32 f1608, f1470, f1475;
mul.f32 f1609, f1608, 0f3F5DB3D7;
add.f32 f1610, f1609, f1607;
sub.f32 f1611, f1607, f1609;
add.f32 f2211, f1163, f2212;
mul.f32 f1612, f2212, 0f3F000000;
sub.f32 f1613, f1163, f1612;
sub.f32 f1614, f1468, f1473;
mul.f32 f1615, f1614, 0f3F5DB3D7;
sub.f32 f1616, f1613, f1615;
add.f32 f1617, f1615, f1613;
mul.wide.u32 rd7, r7, 795364315;
shr.u64 rd8, rd7, 32;
cvt.u32.u64 r11, rd8;
sub.s32 r12, r7, r11;
shr.u32 r13, r12, 1;
add.s32 r14, r13, r11;
shr.u32 r15, r14, 4;
mul.lo.s32 r16, r15, 27;
sub.s32 r17, r7, r16;
mul.wide.u32 rd12, r15, 8;
mov.u64 rd13, %56;
add.s64 rd11, rd13, rd12;
ld.global.v2.f32 {f1618, f1619}, [rd11];
mul.f32 f1623, f1619, f2225;
mul.f32 f1624, f1618, f2225;
mul.f32 f2209, f1618, f1618;
mul.f32 f2210, f1619, f1619;
sub.f32 f1627, f2209, f2210;
mul.f32 f1628, f1619, f1618;
fma.rn.f32 f1629, f1619, f1618, f1628;
mul.f32 f1631, f1629, f2223;
mul.f32 f1632, f1627, f2223;
mul.f32 f1634, f1619, f1629;
mul.f32 f2208, f1618, f1627;
sub.f32 f1635, f2208, f1634;
mul.f32 f2207, f1627, f1507;
mul.f32 f1636, f1618, f1629;
fma.rn.f32 f1637, f1619, f1627, f1636;
mul.f32 f1639, f1637, f2221;
mul.f32 f1640, f1635, f2221;
mul.f32 f2205, f1618, f1635;
mul.f32 f2206, f1619, f1637;
sub.f32 f1643, f2205, f2206;
mul.f32 f2204, f1635, f1523;
mul.f32 f1644, f1618, f1637;
fma.rn.f32 f1645, f1619, f1635, f1644;
mul.f32 f1647, f1645, f2219;
mul.f32 f1648, f1643, f2219;
mul.f32 f1650, f1619, f1645;
mul.f32 f2203, f1618, f1643;
sub.f32 f1651, f2203, f1650;
mul.f32 f2202, f1643, f1539;
mul.f32 f1652, f1618, f1645;
fma.rn.f32 f1653, f1619, f1643, f1652;
mul.f32 f1655, f1653, f2217;
mul.f32 f1656, f1651, f2217;
mul.f32 f1658, f1619, f1653;
mul.f32 f2201, f1618, f1651;
sub.f32 f1659, f2201, f1658;
mul.f32 f2200, f1651, f1555;
mul.f32 f1660, f1618, f1653;
fma.rn.f32 f1661, f1619, f1651, f1660;
mul.f32 f1663, f1661, f2215;
mul.f32 f1664, f1659, f2215;
mul.f32 f2198, f1618, f1659;
mul.f32 f2199, f1619, f1661;
sub.f32 f1667, f2198, f2199;
mul.f32 f2197, f1659, f1571;
mul.f32 f1668, f1618, f1661;
fma.rn.f32 f1669, f1619, f1659, f1668;
mul.f32 f1671, f1669, f2213;
mul.f32 f1672, f1667, f2213;
mul.f32 f1674, f1619, f1669;
mul.f32 f2196, f1618, f1667;
sub.f32 f1675, f2196, f1674;
mul.f32 f2195, f1667, f1587;
mul.f32 f1676, f1618, f1669;
fma.rn.f32 f1677, f1619, f1667, f1676;
mul.f32 f1679, f1677, f2211;
mul.f32 f1680, f1675, f2211;
mul.f32 f1682, f1619, f1677;
mul.f32 f2194, f1618, f1675;
sub.f32 f1683, f2194, f1682;
mul.f32 f2193, f1675, f1603;
mul.f32 f1684, f1618, f1677;
fma.rn.f32 f1685, f1619, f1675, f1684;
mul.f32 f1687, f1685, f1488;
mul.f32 f1688, f1683, f1488;
mul.f32 f2191, f1618, f1683;
mul.f32 f2192, f1619, f1685;
sub.f32 f1691, f2191, f2192;
mul.f32 f2190, f1683, f1482;
mul.f32 f1692, f1618, f1685;
fma.rn.f32 f1693, f1619, f1683, f1692;
mul.f32 f1695, f1693, f1504;
mul.f32 f1696, f1691, f1504;
mul.f32 f1698, f1619, f1693;
mul.f32 f2189, f1618, f1691;
sub.f32 f1699, f2189, f1698;
mul.f32 f2188, f1691, f1498;
mul.f32 f1700, f1618, f1693;
fma.rn.f32 f1701, f1619, f1691, f1700;
mul.f32 f1703, f1701, f1520;
mul.f32 f1704, f1699, f1520;
mul.f32 f2186, f1618, f1699;
mul.f32 f2187, f1619, f1701;
sub.f32 f1707, f2186, f2187;
mul.f32 f2185, f1699, f1514;
mul.f32 f1708, f1618, f1701;
fma.rn.f32 f1709, f1619, f1699, f1708;
mul.f32 f1711, f1709, f1536;
mul.f32 f1712, f1707, f1536;
mul.f32 f1714, f1619, f1709;
mul.f32 f2184, f1618, f1707;
sub.f32 f1715, f2184, f1714;
mul.f32 f2183, f1707, f1530;
mul.f32 f1716, f1618, f1709;
fma.rn.f32 f1717, f1619, f1707, f1716;
mul.f32 f1719, f1717, f1552;
mul.f32 f1720, f1715, f1552;
mul.f32 f1722, f1619, f1717;
mul.f32 f2182, f1618, f1715;
sub.f32 f1723, f2182, f1722;
mul.f32 f2181, f1715, f1546;
mul.f32 f1724, f1618, f1717;
fma.rn.f32 f1725, f1619, f1715, f1724;
mul.f32 f1727, f1725, f1568;
mul.f32 f1728, f1723, f1568;
mul.f32 f2179, f1618, f1723;
mul.f32 f2180, f1619, f1725;
sub.f32 f1731, f2179, f2180;
mul.f32 f2178, f1723, f1562;
mul.f32 f1732, f1618, f1725;
fma.rn.f32 f1733, f1619, f1723, f1732;
mul.f32 f1735, f1733, f1584;
mul.f32 f1736, f1731, f1584;
mul.f32 f1738, f1619, f1733;
mul.f32 f2177, f1618, f1731;
sub.f32 f1739, f2177, f1738;
mul.f32 f2176, f1731, f1578;
mul.f32 f1740, f1618, f1733;
fma.rn.f32 f1741, f1619, f1731, f1740;
mul.f32 f1743, f1741, f1600;
mul.f32 f1744, f1739, f1600;
mul.f32 f1746, f1619, f1741;
mul.f32 f2175, f1618, f1739;
sub.f32 f1747, f2175, f1746;
mul.f32 f2174, f1739, f1594;
mul.f32 f1748, f1618, f1741;
fma.rn.f32 f1749, f1619, f1739, f1748;
mul.f32 f1751, f1749, f1616;
mul.f32 f1752, f1747, f1616;
mul.f32 f2172, f1618, f1747;
mul.f32 f2173, f1619, f1749;
sub.f32 f1755, f2172, f2173;
mul.f32 f2171, f1747, f1610;
mul.f32 f1756, f1618, f1749;
fma.rn.f32 f1757, f1619, f1747, f1756;
mul.f32 f1759, f1757, f1489;
mul.f32 f1760, f1755, f1489;
mul.f32 f1762, f1619, f1757;
mul.f32 f2170, f1618, f1755;
sub.f32 f1763, f2170, f1762;
mul.f32 f2169, f1755, f1483;
mul.f32 f1764, f1618, f1757;
fma.rn.f32 f1765, f1619, f1755, f1764;
mul.f32 f1767, f1765, f1505;
mul.f32 f1768, f1763, f1505;
mul.f32 f2167, f1618, f1763;
mul.f32 f2168, f1619, f1765;
sub.f32 f1771, f2167, f2168;
mul.f32 f2166, f1763, f1499;
mul.f32 f1772, f1618, f1765;
fma.rn.f32 f1773, f1619, f1763, f1772;
mul.f32 f1775, f1773, f1521;
mul.f32 f1776, f1771, f1521;
mul.f32 f1778, f1619, f1773;
mul.f32 f2165, f1618, f1771;
sub.f32 f1779, f2165, f1778;
mul.f32 f2164, f1771, f1515;
mul.f32 f1780, f1618, f1773;
fma.rn.f32 f1781, f1619, f1771, f1780;
mul.f32 f1783, f1781, f1537;
mul.f32 f1784, f1779, f1537;
mul.f32 f1786, f1619, f1781;
mul.f32 f2163, f1618, f1779;
sub.f32 f1787, f2163, f1786;
mul.f32 f2162, f1779, f1531;
mul.f32 f1788, f1618, f1781;
fma.rn.f32 f1789, f1619, f1779, f1788;
mul.f32 f1791, f1789, f1553;
mul.f32 f1792, f1787, f1553;
mul.f32 f2160, f1618, f1787;
mul.f32 f2161, f1619, f1789;
sub.f32 f1795, f2160, f2161;
mul.f32 f2159, f1787, f1547;
mul.f32 f1796, f1618, f1789;
fma.rn.f32 f1797, f1619, f1787, f1796;
mul.f32 f1799, f1797, f1569;
mul.f32 f1800, f1795, f1569;
mul.f32 f1802, f1619, f1797;
mul.f32 f2158, f1618, f1795;
sub.f32 f1803, f2158, f1802;
mul.f32 f2157, f1795, f1563;
mul.f32 f1804, f1618, f1797;
fma.rn.f32 f1805, f1619, f1795, f1804;
mul.f32 f1807, f1805, f1585;
mul.f32 f1808, f1803, f1585;
mul.f32 f1810, f1619, f1805;
mul.f32 f2156, f1618, f1803;
sub.f32 f1811, f2156, f1810;
mul.f32 f2155, f1803, f1579;
mul.f32 f1812, f1618, f1805;
fma.rn.f32 f1813, f1619, f1803, f1812;
mul.f32 f1815, f1813, f1601;
mul.f32 f1816, f1811, f1601;
mul.f32 f2153, f1618, f1811;
mul.f32 f2154, f1619, f1813;
sub.f32 f1819, f2153, f2154;
mul.f32 f2152, f1618, f1491;
mul.f32 f1820, f1618, f1813;
mul.f32 f2151, f1811, f1595;
fma.rn.f32 f1821, f1619, f1811, f1820;
mul.f32 f1822, f1819, f1611;
mul.f32 f1823, f1821, f1617;
mul.f32 f1824, f1819, f1617;
shl.b32 r18, r17, 3;
add.s32 r19, r8, r18;
barrier.sync 0;
mad.lo.s32 r20, r15, 5832, r19;
add.f32 f1825, f2294, f2227;
add.f32 f1826, f1117, f1476;
st.shared.v2.f32 [r20], {f1826, f1825};
fma.rn.f32 f1827, f1619, f1491, f1624;
sub.f32 f1828, f2152, f1623;
st.shared.v2.f32 [r20+216], {f1828, f1827};
fma.rn.f32 f1829, f1629, f1507, f1632;
sub.f32 f1830, f2207, f1631;
st.shared.v2.f32 [r20+432], {f1830, f1829};
fma.rn.f32 f1831, f1637, f1523, f1640;
sub.f32 f1832, f2204, f1639;
st.shared.v2.f32 [r20+648], {f1832, f1831};
fma.rn.f32 f1833, f1645, f1539, f1648;
sub.f32 f1834, f2202, f1647;
st.shared.v2.f32 [r20+864], {f1834, f1833};
fma.rn.f32 f1835, f1653, f1555, f1656;
sub.f32 f1836, f2200, f1655;
st.shared.v2.f32 [r20+1080], {f1836, f1835};
sub.f32 f1837, f2197, f1663;
fma.rn.f32 f1838, f1661, f1571, f1664;
st.shared.v2.f32 [r20+1296], {f1837, f1838};
fma.rn.f32 f1839, f1669, f1587, f1672;
sub.f32 f1840, f2195, f1671;
st.shared.v2.f32 [r20+1512], {f1840, f1839};
sub.f32 f1841, f2193, f1679;
fma.rn.f32 f1842, f1677, f1603, f1680;
st.shared.v2.f32 [r20+1728], {f1841, f1842};
fma.rn.f32 f1843, f1685, f1482, f1688;
sub.f32 f1844, f2190, f1687;
st.shared.v2.f32 [r20+1944], {f1844, f1843};
fma.rn.f32 f1845, f1693, f1498, f1696;
sub.f32 f1846, f2188, f1695;
st.shared.v2.f32 [r20+2160], {f1846, f1845};
fma.rn.f32 f1847, f1701, f1514, f1704;
sub.f32 f1848, f2185, f1703;
st.shared.v2.f32 [r20+2376], {f1848, f1847};
fma.rn.f32 f1849, f1709, f1530, f1712;
sub.f32 f1850, f2183, f1711;
st.shared.v2.f32 [r20+2592], {f1850, f1849};
fma.rn.f32 f1851, f1717, f1546, f1720;
sub.f32 f1852, f2181, f1719;
st.shared.v2.f32 [r20+2808], {f1852, f1851};
fma.rn.f32 f1853, f1725, f1562, f1728;
sub.f32 f1854, f2178, f1727;
st.shared.v2.f32 [r20+3024], {f1854, f1853};
fma.rn.f32 f1855, f1733, f1578, f1736;
sub.f32 f1856, f2176, f1735;
st.shared.v2.f32 [r20+3240], {f1856, f1855};
fma.rn.f32 f1857, f1741, f1594, f1744;
sub.f32 f1858, f2174, f1743;
st.shared.v2.f32 [r20+3456], {f1858, f1857};
fma.rn.f32 f1859, f1749, f1610, f1752;
sub.f32 f1860, f2171, f1751;
st.shared.v2.f32 [r20+3672], {f1860, f1859};
fma.rn.f32 f1861, f1757, f1483, f1760;
sub.f32 f1862, f2169, f1759;
st.shared.v2.f32 [r20+3888], {f1862, f1861};
fma.rn.f32 f1863, f1765, f1499, f1768;
sub.f32 f1864, f2166, f1767;
st.shared.v2.f32 [r20+4104], {f1864, f1863};
fma.rn.f32 f1865, f1773, f1515, f1776;
sub.f32 f1866, f2164, f1775;
st.shared.v2.f32 [r20+4320], {f1866, f1865};
fma.rn.f32 f1867, f1781, f1531, f1784;
sub.f32 f1868, f2162, f1783;
st.shared.v2.f32 [r20+4536], {f1868, f1867};
fma.rn.f32 f1869, f1789, f1547, f1792;
sub.f32 f1870, f2159, f1791;
st.shared.v2.f32 [r20+4752], {f1870, f1869};
fma.rn.f32 f1871, f1797, f1563, f1800;
sub.f32 f1872, f2157, f1799;
st.shared.v2.f32 [r20+4968], {f1872, f1871};
fma.rn.f32 f1873, f1805, f1579, f1808;
sub.f32 f1874, f2155, f1807;
st.shared.v2.f32 [r20+5184], {f1874, f1873};
fma.rn.f32 f1875, f1813, f1595, f1816;
sub.f32 f1876, f2151, f1815;
st.shared.v2.f32 [r20+5400], {f1876, f1875};
fma.rn.f32 f1877, f1821, f1611, f1824;
sub.f32 f1878, f1822, f1823;
st.shared.v2.f32 [r20+5616], {f1878, f1877};
barrier.sync 0;
ld.shared.v2.f32 {f1879, f1880}, [r10];
ld.shared.v2.f32 {f1883, f1884}, [r10+648];
ld.shared.v2.f32 {f1887, f1888}, [r10+1296];
ld.shared.v2.f32 {f1891, f1892}, [r10+1944];
ld.shared.v2.f32 {f1895, f1896}, [r10+2592];
ld.shared.v2.f32 {f1899, f1900}, [r10+3240];
ld.shared.v2.f32 {f1903, f1904}, [r10+3888];
ld.shared.v2.f32 {f1907, f1908}, [r10+4536];
ld.shared.v2.f32 {f1911, f1912}, [r10+5184];
ld.shared.v2.f32 {f1915, f1916}, [r10+5832];
ld.shared.v2.f32 {f1919, f1920}, [r10+6480];
ld.shared.v2.f32 {f1923, f1924}, [r10+7128];
ld.shared.v2.f32 {f1927, f1928}, [r10+7776];
ld.shared.v2.f32 {f1931, f1932}, [r10+8424];
ld.shared.v2.f32 {f1935, f1936}, [r10+9072];
ld.shared.v2.f32 {f1939, f1940}, [r10+9720];
ld.shared.v2.f32 {f1943, f1944}, [r10+10368];
ld.shared.v2.f32 {f1947, f1948}, [r10+11016];
ld.shared.v2.f32 {f1951, f1952}, [r10+11664];
ld.shared.v2.f32 {f1955, f1956}, [r10+12312];
ld.shared.v2.f32 {f1959, f1960}, [r10+12960];
ld.shared.v2.f32 {f1963, f1964}, [r10+13608];
ld.shared.v2.f32 {f1967, f1968}, [r10+14256];
ld.shared.v2.f32 {f1971, f1972}, [r10+14904];
ld.shared.v2.f32 {f1975, f1976}, [r10+15552];
ld.shared.v2.f32 {f1979, f1980}, [r10+16200];
ld.shared.v2.f32 {f1983, f1984}, [r10+16848];
add.f32 f1987, f1915, f1951;
mul.f32 f1989, f1987, 0f3F000000;
sub.f32 f1990, f1879, f1989;
add.f32 f2150, f1916, f1952;
sub.f32 f1991, f1916, f1952;
mul.f32 f1992, f1991, 0f3F5DB3D7;
mul.f32 f1993, f2150, 0f3F000000;
sub.f32 f1994, f1880, f1993;
sub.f32 f1995, f1915, f1951;
mul.f32 f1996, f1995, 0f3F5DB3D7;
add.f32 f1997, f1919, f1955;
mul.f32 f1999, f1997, 0f3F000000;
sub.f32 f2000, f1883, f1999;
add.f32 f2149, f1920, f1956;
sub.f32 f2001, f1920, f1956;
mul.f32 f2002, f2001, 0f3F5DB3D7;
mul.f32 f2003, f2149, 0f3F000000;
sub.f32 f2004, f1884, f2003;
sub.f32 f2005, f1919, f1955;
mul.f32 f2006, f2005, 0f3F5DB3D7;
add.f32 f2007, f1923, f1959;
mul.f32 f2009, f2007, 0f3F000000;
sub.f32 f2010, f1887, f2009;
add.f32 f2148, f1924, f1960;
sub.f32 f2011, f1924, f1960;
mul.f32 f2012, f2011, 0f3F5DB3D7;
mul.f32 f2013, f2148, 0f3F000000;
sub.f32 f2014, f1888, f2013;
sub.f32 f2015, f1923, f1959;
mul.f32 f2016, f2015, 0f3F5DB3D7;
add.f32 f2017, f1927, f1963;
mul.f32 f2019, f2017, 0f3F000000;
sub.f32 f2020, f1891, f2019;
add.f32 f2147, f1928, f1964;
sub.f32 f2021, f1928, f1964;
mul.f32 f2022, f2021, 0f3F5DB3D7;
mul.f32 f2023, f2147, 0f3F000000;
sub.f32 f2024, f1892, f2023;
sub.f32 f2025, f1927, f1963;
mul.f32 f2026, f2025, 0f3F5DB3D7;
add.f32 f2027, f1931, f1967;
mul.f32 f2029, f2027, 0f3F000000;
sub.f32 f2030, f1895, f2029;
add.f32 f2146, f1932, f1968;
sub.f32 f2031, f1932, f1968;
mul.f32 f2032, f2031, 0f3F5DB3D7;
mul.f32 f2033, f2146, 0f3F000000;
sub.f32 f2034, f1896, f2033;
sub.f32 f2035, f1931, f1967;
mul.f32 f2036, f2035, 0f3F5DB3D7;
add.f32 f2037, f1935, f1971;
mul.f32 f2039, f2037, 0f3F000000;
sub.f32 f2040, f1899, f2039;
add.f32 f2145, f1936, f1972;
sub.f32 f2041, f1936, f1972;
mul.f32 f2042, f2041, 0f3F5DB3D7;
mul.f32 f2043, f2145, 0f3F000000;
sub.f32 f2044, f1900, f2043;
sub.f32 f2045, f1935, f1971;
mul.f32 f2046, f2045, 0f3F5DB3D7;
add.f32 f2047, f1939, f1975;
mul.f32 f2049, f2047, 0f3F000000;
sub.f32 f2050, f1903, f2049;
add.f32 f2144, f1940, f1976;
sub.f32 f2051, f1940, f1976;
mul.f32 f2052, f2051, 0f3F5DB3D7;
mul.f32 f2053, f2144, 0f3F000000;
sub.f32 f2054, f1904, f2053;
sub.f32 f2055, f1939, f1975;
mul.f32 f2056, f2055, 0f3F5DB3D7;
add.f32 f2057, f1943, f1979;
mul.f32 f2059, f2057, 0f3F000000;
sub.f32 f2060, f1907, f2059;
add.f32 f2143, f1944, f1980;
sub.f32 f2061, f1944, f1980;
mul.f32 f2062, f2061, 0f3F5DB3D7;
mul.f32 f2063, f2143, 0f3F000000;
sub.f32 f2064, f1908, f2063;
sub.f32 f2065, f1943, f1979;
mul.f32 f2066, f2065, 0f3F5DB3D7;
add.f32 f2067, f1947, f1983;
mul.f32 f2069, f2067, 0f3F000000;
sub.f32 f2070, f1911, f2069;
add.f32 f2142, f1948, f1984;
sub.f32 f2071, f1948, f1984;
mul.f32 f2072, f2071, 0f3F5DB3D7;
mul.f32 f2073, f2142, 0f3F000000;
sub.f32 f2074, f1912, f2073;
sub.f32 f2075, f1947, f1983;
mul.f32 f2076, f2075, 0f3F5DB3D7;
add.f32 %1, f1880, f2150;
add.f32 %0, f1879, f1987;
add.f32 %3, f1884, f2149;
add.f32 %2, f1883, f1997;
add.f32 %5, f1888, f2148;
add.f32 %4, f1887, f2007;
add.f32 %7, f1892, f2147;
add.f32 %6, f1891, f2017;
add.f32 %9, f1896, f2146;
add.f32 %8, f1895, f2027;
add.f32 %11, f1900, f2145;
add.f32 %10, f1899, f2037;
add.f32 %13, f1904, f2144;
add.f32 %12, f1903, f2047;
add.f32 %15, f1908, f2143;
add.f32 %14, f1907, f2057;
add.f32 %17, f1912, f2142;
add.f32 %16, f1911, f2067;
add.f32 %18, f1992, f1990;
sub.f32 %19, f1994, f1996;
add.f32 %20, f2002, f2000;
sub.f32 %21, f2004, f2006;
add.f32 %22, f2012, f2010;
sub.f32 %23, f2014, f2016;
add.f32 %24, f2022, f2020;
sub.f32 %25, f2024, f2026;
sub.f32 %27, f2034, f2036;
add.f32 %26, f2032, f2030;
sub.f32 %29, f2044, f2046;
add.f32 %28, f2042, f2040;
add.f32 %30, f2052, f2050;
sub.f32 %31, f2054, f2056;
add.f32 %32, f2062, f2060;
sub.f32 %33, f2064, f2066;
add.f32 %34, f2072, f2070;
sub.f32 %35, f2074, f2076;
add.f32 %37, f1996, f1994;
sub.f32 %36, f1990, f1992;
add.f32 %39, f2006, f2004;
sub.f32 %38, f2000, f2002;
add.f32 %41, f2016, f2014;
sub.f32 %40, f2010, f2012;
add.f32 %43, f2026, f2024;
sub.f32 %42, f2020, f2022;
add.f32 %45, f2036, f2034;
sub.f32 %44, f2030, f2032;
add.f32 %47, f2046, f2044;
sub.f32 %46, f2040, f2042;
add.f32 %49, f2056, f2054;
sub.f32 %48, f2050, f2052;
add.f32 %51, f2066, f2064;
sub.f32 %50, f2060, f2062;
add.f32 %53, f2076, f2074;
sub.f32 %52, f2070, f2072;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y), "=f"(rmem[9].x), "=f"(rmem[9].y), "=f"(rmem[10].x), "=f"(rmem[10].y), "=f"(rmem[11].x), "=f"(rmem[11].y), "=f"(rmem[12].x), "=f"(rmem[12].y), "=f"(rmem[13].x), "=f"(rmem[13].y), "=f"(rmem[14].x), "=f"(rmem[14].y), "=f"(rmem[15].x), "=f"(rmem[15].y), "=f"(rmem[16].x), "=f"(rmem[16].y), "=f"(rmem[17].x), "=f"(rmem[17].y), "=f"(rmem[18].x), "=f"(rmem[18].y), "=f"(rmem[19].x), "=f"(rmem[19].y), "=f"(rmem[20].x), "=f"(rmem[20].y), "=f"(rmem[21].x), "=f"(rmem[21].y), "=f"(rmem[22].x), "=f"(rmem[22].y), "=f"(rmem[23].x), "=f"(rmem[23].y), "=f"(rmem[24].x), "=f"(rmem[24].y), "=f"(rmem[25].x), "=f"(rmem[25].y), "=f"(rmem[26].x), "=f"(rmem[26].y): "r"(smem), "l"(lut_sp_27_2187), "l"(lut_sp_27_81), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y), "f"(rmem[9].x), "f"(rmem[9].y), "f"(rmem[10].x), "f"(rmem[10].y), "f"(rmem[11].x), "f"(rmem[11].y), "f"(rmem[12].x), "f"(rmem[12].y), "f"(rmem[13].x), "f"(rmem[13].y), "f"(rmem[14].x), "f"(rmem[14].y), "f"(rmem[15].x), "f"(rmem[15].y), "f"(rmem[16].x), "f"(rmem[16].y), "f"(rmem[17].x), "f"(rmem[17].y), "f"(rmem[18].x), "f"(rmem[18].y), "f"(rmem[19].x), "f"(rmem[19].y), "f"(rmem[20].x), "f"(rmem[20].y), "f"(rmem[21].x), "f"(rmem[21].y), "f"(rmem[22].x), "f"(rmem[22].y), "f"(rmem[23].x), "f"(rmem[23].y), "f"(rmem[24].x), "f"(rmem[24].y), "f"(rmem[25].x), "f"(rmem[25].y), "f"(rmem[26].x), "f"(rmem[26].y), "f"(rmem[10].y), "f"(rmem[19].y), "f"(rmem[1].y), "f"(rmem[22].y), "f"(rmem[13].y), "f"(rmem[4].y), "f"(rmem[16].y), "f"(rmem[25].y), "f"(rmem[7].y), "f"(rmem[11].y), "f"(rmem[20].y), "f"(rmem[2].y), "f"(rmem[23].y), "f"(rmem[14].y), "f"(rmem[5].y), "f"(rmem[17].y), "f"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<144, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<778>;
.reg .b32 r<24>;
.reg .b64 rd<17>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 17496, r2;
add.f32 f37, %30, %38;
add.f32 f38, %22, f37;
add.f32 f39, %31, %39;
add.f32 f40, %23, f39;
mul.f32 f41, f37, 0f3F000000;
sub.f32 f42, %22, f41;
sub.f32 f43, %31, %39;
mul.f32 f44, f43, 0f3F5DB3D7;
add.f32 f45, f44, f42;
sub.f32 f46, f42, f44;
mul.f32 f47, f39, 0f3F000000;
sub.f32 f48, %23, f47;
sub.f32 f49, %30, %38;
mul.f32 f50, f49, 0f3F5DB3D7;
sub.f32 f51, f48, f50;
add.f32 f52, f50, f48;
add.f32 f53, %32, %40;
add.f32 f54, %24, f53;
add.f32 f55, %34, %42;
add.f32 f56, %26, f55;
mul.f32 f57, f53, 0f3F000000;
sub.f32 f58, %24, f57;
sub.f32 f59, %34, %42;
mul.f32 f60, f59, 0f3F5DB3D7;
add.f32 f61, f60, f58;
sub.f32 f62, f58, f60;
mul.f32 f63, f55, 0f3F000000;
sub.f32 f64, %26, f63;
sub.f32 f65, %32, %40;
mul.f32 f66, f65, 0f3F5DB3D7;
sub.f32 f67, f64, f66;
add.f32 f68, f66, f64;
add.f32 f69, %35, %43;
add.f32 f70, %27, f69;
add.f32 f71, %37, %44;
add.f32 f72, %29, f71;
mul.f32 f73, f69, 0f3F000000;
sub.f32 f74, %27, f73;
sub.f32 f75, %37, %44;
mul.f32 f76, f75, 0f3F5DB3D7;
add.f32 f77, f76, f74;
sub.f32 f78, f74, f76;
mul.f32 f79, f71, 0f3F000000;
sub.f32 f80, %29, f79;
sub.f32 f81, %35, %43;
mul.f32 f82, f81, 0f3F5DB3D7;
sub.f32 f83, f80, f82;
add.f32 f84, f82, f80;
mov.u32 r4, %tid.x;
mul.f32 f85, f61, 0f3F441B7D;
mul.f32 f86, f67, 0fBF248DBB;
sub.f32 f87, f85, f86;
mul.f32 f88, f67, 0f3F441B7D;
fma.rn.f32 f89, f61, 0fBF248DBB, f88;
mul.f32 f90, f77, 0f3E31D0D4;
mul.f32 f91, f83, 0fBF7C1C5C;
sub.f32 f92, f90, f91;
mul.f32 f93, f83, 0f3E31D0D4;
fma.rn.f32 f94, f77, 0fBF7C1C5C, f93;
mul.f32 f95, f62, 0f3E31D0D4;
mul.f32 f96, f68, 0fBF7C1C5C;
sub.f32 f97, f95, f96;
mul.f32 f98, f68, 0f3E31D0D4;
fma.rn.f32 f99, f62, 0fBF7C1C5C, f98;
mul.f32 f100, f78, 0fBF708FB2;
mul.f32 f101, f84, 0fBEAF1D44;
sub.f32 f102, f100, f101;
mul.f32 f103, f84, 0fBF708FB2;
fma.rn.f32 f104, f78, 0fBEAF1D44, f103;
add.f32 f105, f54, f70;
add.f32 f106, f56, f72;
mul.f32 f107, f105, 0f3F000000;
sub.f32 f108, f38, f107;
sub.f32 f109, f56, f72;
mul.f32 f110, f109, 0f3F5DB3D7;
add.f32 f111, f110, f108;
sub.f32 f112, f108, f110;
mul.f32 f113, f106, 0f3F000000;
sub.f32 f114, f40, f113;
sub.f32 f115, f54, f70;
mul.f32 f116, f115, 0f3F5DB3D7;
sub.f32 f117, f114, f116;
add.f32 f118, f116, f114;
add.f32 f119, f87, f92;
add.f32 f120, f45, f119;
add.f32 f121, f89, f94;
add.f32 f122, f51, f121;
mul.f32 f123, f119, 0f3F000000;
sub.f32 f124, f45, f123;
sub.f32 f125, f89, f94;
mul.f32 f126, f125, 0f3F5DB3D7;
add.f32 f127, f126, f124;
sub.f32 f128, f124, f126;
mul.f32 f129, f121, 0f3F000000;
sub.f32 f130, f51, f129;
sub.f32 f131, f87, f92;
mul.f32 f132, f131, 0f3F5DB3D7;
sub.f32 f133, f130, f132;
add.f32 f134, f132, f130;
add.f32 f135, f97, f102;
add.f32 f136, f46, f135;
add.f32 f137, f99, f104;
add.f32 f138, f52, f137;
mul.f32 f139, f135, 0f3F000000;
sub.f32 f140, f46, f139;
sub.f32 f141, f99, f104;
mul.f32 f142, f141, 0f3F5DB3D7;
add.f32 f143, f142, f140;
sub.f32 f144, f140, f142;
mul.f32 f145, f137, 0f3F000000;
sub.f32 f146, f52, f145;
sub.f32 f147, f97, f102;
mul.f32 f148, f147, 0f3F5DB3D7;
sub.f32 f149, f146, f148;
add.f32 f150, f148, f146;
mul.wide.u32 rd2, r4, -2032597691;
shr.u64 rd3, rd2, 39;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 243;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 17496, r3;
mul.wide.u32 rd4, r7, 8;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f151, f152}, [rd6];
mul.f32 f155, f151, f120;
mul.f32 f156, f152, f122;
mul.f32 f157, f151, f122;
mul.f32 f158, f151, f151;
mul.f32 f159, f152, f152;
sub.f32 f160, f158, f159;
mul.f32 f161, f152, f151;
fma.rn.f32 f162, f152, f151, f161;
mul.f32 f163, f160, f136;
mul.f32 f164, f162, f138;
mul.f32 f165, f160, f138;
mul.f32 f166, f151, f160;
mul.f32 f167, f152, f162;
sub.f32 f168, f166, f167;
mul.f32 f169, f151, f162;
fma.rn.f32 f170, f152, f160, f169;
mul.f32 f171, f168, f111;
mul.f32 f172, f170, f117;
mul.f32 f173, f168, f117;
mul.f32 f174, f151, f168;
mul.f32 f175, f152, f170;
sub.f32 f176, f174, f175;
mul.f32 f177, f151, f170;
fma.rn.f32 f178, f152, f168, f177;
mul.f32 f179, f176, f127;
mul.f32 f180, f178, f133;
mul.f32 f181, f176, f133;
mul.f32 f182, f151, f176;
mul.f32 f183, f152, f178;
sub.f32 f184, f182, f183;
mul.f32 f185, f151, f178;
fma.rn.f32 f186, f152, f176, f185;
mul.f32 f187, f184, f143;
mul.f32 f188, f186, f149;
mul.f32 f189, f184, f149;
mul.f32 f190, f151, f184;
mul.f32 f191, f152, f186;
sub.f32 f192, f190, f191;
mul.f32 f193, f151, f186;
fma.rn.f32 f194, f152, f184, f193;
mul.f32 f195, f192, f112;
mul.f32 f196, f194, f118;
mul.f32 f197, f192, f118;
mul.f32 f198, f151, f192;
mul.f32 f199, f152, f194;
sub.f32 f200, f198, f199;
mul.f32 f201, f151, f194;
fma.rn.f32 f202, f152, f192, f201;
mul.f32 f203, f200, f128;
mul.f32 f204, f202, f134;
mul.f32 f205, f200, f134;
mul.f32 f206, f151, f200;
mul.f32 f207, f152, f202;
sub.f32 f208, f206, f207;
mul.f32 f209, f151, f202;
fma.rn.f32 f210, f152, f200, f209;
mul.f32 f211, f208, f144;
mul.f32 f212, f210, f150;
mul.f32 f213, f208, f150;
barrier.sync 0;
mad.lo.s32 r9, r7, 72, r8;
add.f32 f214, f40, f106;
add.f32 f215, f38, f105;
st.shared.v2.f32 [r9], {f215, f214};
fma.rn.f32 f216, f152, f120, f157;
sub.f32 f217, f155, f156;
st.shared.v2.f32 [r9+8], {f217, f216};
fma.rn.f32 f218, f162, f136, f165;
sub.f32 f219, f163, f164;
st.shared.v2.f32 [r9+16], {f219, f218};
sub.f32 f220, f171, f172;
fma.rn.f32 f221, f170, f111, f173;
st.shared.v2.f32 [r9+24], {f220, f221};
fma.rn.f32 f222, f178, f127, f181;
sub.f32 f223, f179, f180;
st.shared.v2.f32 [r9+32], {f223, f222};
sub.f32 f224, f187, f188;
fma.rn.f32 f225, f186, f143, f189;
st.shared.v2.f32 [r9+40], {f224, f225};
fma.rn.f32 f226, f194, f112, f197;
sub.f32 f227, f195, f196;
st.shared.v2.f32 [r9+48], {f227, f226};
fma.rn.f32 f228, f202, f128, f205;
sub.f32 f229, f203, f204;
st.shared.v2.f32 [r9+56], {f229, f228};
fma.rn.f32 f230, f210, f144, f213;
sub.f32 f231, f211, f212;
st.shared.v2.f32 [r9+64], {f231, f230};
barrier.sync 0;
shl.b32 r10, r7, 6;
sub.s32 r11, r9, r10;
ld.shared.v2.f32 {f232, f233}, [r11];
ld.shared.v2.f32 {f236, f237}, [r11+1944];
ld.shared.v2.f32 {f240, f241}, [r11+3888];
ld.shared.v2.f32 {f244, f245}, [r11+5832];
ld.shared.v2.f32 {f248, f249}, [r11+7776];
ld.shared.v2.f32 {f252, f253}, [r11+9720];
ld.shared.v2.f32 {f256, f257}, [r11+11664];
ld.shared.v2.f32 {f260, f261}, [r11+13608];
ld.shared.v2.f32 {f264, f265}, [r11+15552];
add.f32 f268, f244, f256;
add.f32 f269, f232, f268;
add.f32 f270, f245, f257;
add.f32 f271, f233, f270;
mul.f32 f272, f268, 0f3F000000;
sub.f32 f273, f232, f272;
sub.f32 f274, f245, f257;
mul.f32 f275, f274, 0f3F5DB3D7;
add.f32 f276, f275, f273;
sub.f32 f277, f273, f275;
mul.f32 f278, f270, 0f3F000000;
sub.f32 f279, f233, f278;
sub.f32 f280, f244, f256;
mul.f32 f281, f280, 0f3F5DB3D7;
sub.f32 f282, f279, f281;
add.f32 f283, f281, f279;
add.f32 f284, f248, f260;
add.f32 f285, f236, f284;
add.f32 f286, f249, f261;
add.f32 f287, f237, f286;
mul.f32 f288, f284, 0f3F000000;
sub.f32 f289, f236, f288;
sub.f32 f290, f249, f261;
mul.f32 f291, f290, 0f3F5DB3D7;
add.f32 f292, f291, f289;
sub.f32 f293, f289, f291;
mul.f32 f294, f286, 0f3F000000;
sub.f32 f295, f237, f294;
sub.f32 f296, f248, f260;
mul.f32 f297, f296, 0f3F5DB3D7;
sub.f32 f298, f295, f297;
add.f32 f299, f297, f295;
add.f32 f300, f252, f264;
add.f32 f301, f240, f300;
add.f32 f302, f253, f265;
add.f32 f303, f241, f302;
mul.f32 f304, f300, 0f3F000000;
sub.f32 f305, f240, f304;
sub.f32 f306, f253, f265;
mul.f32 f307, f306, 0f3F5DB3D7;
add.f32 f308, f307, f305;
sub.f32 f309, f305, f307;
mul.f32 f310, f302, 0f3F000000;
sub.f32 f311, f241, f310;
sub.f32 f312, f252, f264;
mul.f32 f313, f312, 0f3F5DB3D7;
sub.f32 f314, f311, f313;
add.f32 f315, f313, f311;
mul.f32 f316, f292, 0f3F441B7D;
mul.f32 f317, f298, 0fBF248DBB;
sub.f32 f318, f316, f317;
mul.f32 f319, f298, 0f3F441B7D;
fma.rn.f32 f320, f292, 0fBF248DBB, f319;
mul.f32 f321, f308, 0f3E31D0D4;
mul.f32 f322, f314, 0fBF7C1C5C;
sub.f32 f323, f321, f322;
mul.f32 f324, f314, 0f3E31D0D4;
fma.rn.f32 f325, f308, 0fBF7C1C5C, f324;
mul.f32 f326, f293, 0f3E31D0D4;
mul.f32 f327, f299, 0fBF7C1C5C;
sub.f32 f328, f326, f327;
mul.f32 f329, f299, 0f3E31D0D4;
fma.rn.f32 f330, f293, 0fBF7C1C5C, f329;
mul.f32 f331, f309, 0fBF708FB2;
mul.f32 f332, f315, 0fBEAF1D44;
sub.f32 f333, f331, f332;
mul.f32 f334, f315, 0fBF708FB2;
fma.rn.f32 f335, f309, 0fBEAF1D44, f334;
add.f32 f336, f285, f301;
add.f32 f337, f287, f303;
mul.f32 f338, f336, 0f3F000000;
sub.f32 f339, f269, f338;
sub.f32 f340, f287, f303;
mul.f32 f341, f340, 0f3F5DB3D7;
add.f32 f342, f341, f339;
sub.f32 f343, f339, f341;
mul.f32 f344, f337, 0f3F000000;
sub.f32 f345, f271, f344;
sub.f32 f346, f285, f301;
mul.f32 f347, f346, 0f3F5DB3D7;
sub.f32 f348, f345, f347;
add.f32 f349, f347, f345;
add.f32 f350, f318, f323;
add.f32 f351, f276, f350;
add.f32 f352, f320, f325;
add.f32 f353, f282, f352;
mul.f32 f354, f350, 0f3F000000;
sub.f32 f355, f276, f354;
sub.f32 f356, f320, f325;
mul.f32 f357, f356, 0f3F5DB3D7;
add.f32 f358, f357, f355;
sub.f32 f359, f355, f357;
mul.f32 f360, f352, 0f3F000000;
sub.f32 f361, f282, f360;
sub.f32 f362, f318, f323;
mul.f32 f363, f362, 0f3F5DB3D7;
sub.f32 f364, f361, f363;
add.f32 f365, f363, f361;
add.f32 f366, f328, f333;
add.f32 f367, f277, f366;
add.f32 f368, f330, f335;
add.f32 f369, f283, f368;
mul.f32 f370, f366, 0f3F000000;
sub.f32 f371, f277, f370;
sub.f32 f372, f330, f335;
mul.f32 f373, f372, 0f3F5DB3D7;
add.f32 f374, f373, f371;
sub.f32 f375, f371, f373;
mul.f32 f376, f368, 0f3F000000;
sub.f32 f377, f283, f376;
sub.f32 f378, f328, f333;
mul.f32 f379, f378, 0f3F5DB3D7;
sub.f32 f380, f377, f379;
add.f32 f381, f379, f377;
mul.wide.u32 rd7, r7, 954437177;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 9;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 8;
mov.u64 rd10, %20;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f382, f383}, [rd11];
mul.f32 f386, f382, f351;
mul.f32 f387, f383, f353;
mul.f32 f388, f382, f353;
mul.f32 f389, f382, f382;
mul.f32 f390, f383, f383;
sub.f32 f391, f389, f390;
mul.f32 f392, f383, f382;
fma.rn.f32 f393, f383, f382, f392;
mul.f32 f394, f391, f367;
mul.f32 f395, f393, f369;
mul.f32 f396, f391, f369;
mul.f32 f397, f382, f391;
mul.f32 f398, f383, f393;
sub.f32 f399, f397, f398;
mul.f32 f400, f382, f393;
fma.rn.f32 f401, f383, f391, f400;
mul.f32 f402, f399, f342;
mul.f32 f403, f401, f348;
mul.f32 f404, f399, f348;
mul.f32 f405, f382, f399;
mul.f32 f406, f383, f401;
sub.f32 f407, f405, f406;
mul.f32 f408, f382, f401;
fma.rn.f32 f409, f383, f399, f408;
mul.f32 f410, f407, f358;
mul.f32 f411, f409, f364;
mul.f32 f412, f407, f364;
mul.f32 f413, f382, f407;
mul.f32 f414, f383, f409;
sub.f32 f415, f413, f414;
mul.f32 f416, f382, f409;
fma.rn.f32 f417, f383, f407, f416;
mul.f32 f418, f415, f374;
mul.f32 f419, f417, f380;
mul.f32 f420, f415, f380;
mul.f32 f421, f382, f415;
mul.f32 f422, f383, f417;
sub.f32 f423, f421, f422;
mul.f32 f424, f382, f417;
fma.rn.f32 f425, f383, f415, f424;
mul.f32 f426, f423, f343;
mul.f32 f427, f425, f349;
mul.f32 f428, f423, f349;
mul.f32 f429, f382, f423;
mul.f32 f430, f383, f425;
sub.f32 f431, f429, f430;
mul.f32 f432, f382, f425;
fma.rn.f32 f433, f383, f423, f432;
mul.f32 f434, f431, f359;
mul.f32 f435, f433, f365;
mul.f32 f436, f431, f365;
mul.f32 f437, f382, f431;
mul.f32 f438, f383, f433;
sub.f32 f439, f437, f438;
mul.f32 f440, f382, f433;
fma.rn.f32 f441, f383, f431, f440;
mul.f32 f442, f439, f375;
mul.f32 f443, f441, f381;
mul.f32 f444, f439, f381;
shl.b32 r15, r14, 3;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 648, r16;
add.f32 f445, f271, f337;
add.f32 f446, f269, f336;
st.shared.v2.f32 [r17], {f446, f445};
fma.rn.f32 f447, f383, f351, f388;
sub.f32 f448, f386, f387;
st.shared.v2.f32 [r17+72], {f448, f447};
fma.rn.f32 f449, f393, f367, f396;
sub.f32 f450, f394, f395;
st.shared.v2.f32 [r17+144], {f450, f449};
fma.rn.f32 f451, f401, f342, f404;
sub.f32 f452, f402, f403;
st.shared.v2.f32 [r17+216], {f452, f451};
fma.rn.f32 f453, f409, f358, f412;
sub.f32 f454, f410, f411;
st.shared.v2.f32 [r17+288], {f454, f453};
fma.rn.f32 f455, f417, f374, f420;
sub.f32 f456, f418, f419;
st.shared.v2.f32 [r17+360], {f456, f455};
fma.rn.f32 f457, f425, f343, f428;
sub.f32 f458, f426, f427;
st.shared.v2.f32 [r17+432], {f458, f457};
sub.f32 f459, f434, f435;
fma.rn.f32 f460, f433, f359, f436;
st.shared.v2.f32 [r17+504], {f459, f460};
fma.rn.f32 f461, f441, f375, f444;
sub.f32 f462, f442, f443;
st.shared.v2.f32 [r17+576], {f462, f461};
barrier.sync 0;
ld.shared.v2.f32 {f463, f464}, [r11];
ld.shared.v2.f32 {f467, f468}, [r11+1944];
ld.shared.v2.f32 {f471, f472}, [r11+3888];
ld.shared.v2.f32 {f475, f476}, [r11+5832];
ld.shared.v2.f32 {f479, f480}, [r11+7776];
ld.shared.v2.f32 {f483, f484}, [r11+9720];
ld.shared.v2.f32 {f487, f488}, [r11+11664];
ld.shared.v2.f32 {f491, f492}, [r11+13608];
ld.shared.v2.f32 {f495, f496}, [r11+15552];
add.f32 f499, f475, f487;
add.f32 f500, f463, f499;
add.f32 f501, f476, f488;
add.f32 f502, f464, f501;
mul.f32 f503, f499, 0f3F000000;
sub.f32 f504, f463, f503;
sub.f32 f505, f476, f488;
mul.f32 f506, f505, 0f3F5DB3D7;
add.f32 f507, f506, f504;
sub.f32 f508, f504, f506;
mul.f32 f509, f501, 0f3F000000;
sub.f32 f510, f464, f509;
sub.f32 f511, f475, f487;
mul.f32 f512, f511, 0f3F5DB3D7;
sub.f32 f513, f510, f512;
add.f32 f514, f512, f510;
add.f32 f515, f479, f491;
add.f32 f516, f467, f515;
add.f32 f517, f480, f492;
add.f32 f518, f468, f517;
mul.f32 f519, f515, 0f3F000000;
sub.f32 f520, f467, f519;
sub.f32 f521, f480, f492;
mul.f32 f522, f521, 0f3F5DB3D7;
add.f32 f523, f522, f520;
sub.f32 f524, f520, f522;
mul.f32 f525, f517, 0f3F000000;
sub.f32 f526, f468, f525;
sub.f32 f527, f479, f491;
mul.f32 f528, f527, 0f3F5DB3D7;
sub.f32 f529, f526, f528;
add.f32 f530, f528, f526;
add.f32 f531, f483, f495;
add.f32 f532, f471, f531;
add.f32 f533, f484, f496;
add.f32 f534, f472, f533;
mul.f32 f535, f531, 0f3F000000;
sub.f32 f536, f471, f535;
sub.f32 f537, f484, f496;
mul.f32 f538, f537, 0f3F5DB3D7;
add.f32 f539, f538, f536;
sub.f32 f540, f536, f538;
mul.f32 f541, f533, 0f3F000000;
sub.f32 f542, f472, f541;
sub.f32 f543, f483, f495;
mul.f32 f544, f543, 0f3F5DB3D7;
sub.f32 f545, f542, f544;
add.f32 f546, f544, f542;
mul.f32 f547, f523, 0f3F441B7D;
mul.f32 f548, f529, 0fBF248DBB;
sub.f32 f549, f547, f548;
mul.f32 f550, f529, 0f3F441B7D;
fma.rn.f32 f551, f523, 0fBF248DBB, f550;
mul.f32 f552, f539, 0f3E31D0D4;
mul.f32 f553, f545, 0fBF7C1C5C;
sub.f32 f554, f552, f553;
mul.f32 f555, f545, 0f3E31D0D4;
fma.rn.f32 f556, f539, 0fBF7C1C5C, f555;
mul.f32 f557, f524, 0f3E31D0D4;
mul.f32 f558, f530, 0fBF7C1C5C;
sub.f32 f559, f557, f558;
mul.f32 f560, f530, 0f3E31D0D4;
fma.rn.f32 f561, f524, 0fBF7C1C5C, f560;
mul.f32 f562, f540, 0fBF708FB2;
mul.f32 f563, f546, 0fBEAF1D44;
sub.f32 f564, f562, f563;
mul.f32 f565, f546, 0fBF708FB2;
fma.rn.f32 f566, f540, 0fBEAF1D44, f565;
add.f32 f567, f516, f532;
add.f32 f568, f518, f534;
mul.f32 f569, f567, 0f3F000000;
sub.f32 f570, f500, f569;
sub.f32 f571, f518, f534;
mul.f32 f572, f571, 0f3F5DB3D7;
add.f32 f573, f572, f570;
sub.f32 f574, f570, f572;
mul.f32 f575, f568, 0f3F000000;
sub.f32 f576, f502, f575;
sub.f32 f577, f516, f532;
mul.f32 f578, f577, 0f3F5DB3D7;
sub.f32 f579, f576, f578;
add.f32 f580, f578, f576;
add.f32 f581, f549, f554;
add.f32 f582, f507, f581;
add.f32 f583, f551, f556;
add.f32 f584, f513, f583;
mul.f32 f585, f581, 0f3F000000;
sub.f32 f586, f507, f585;
sub.f32 f587, f551, f556;
mul.f32 f588, f587, 0f3F5DB3D7;
add.f32 f589, f588, f586;
sub.f32 f590, f586, f588;
mul.f32 f591, f583, 0f3F000000;
sub.f32 f592, f513, f591;
sub.f32 f593, f549, f554;
mul.f32 f594, f593, 0f3F5DB3D7;
sub.f32 f595, f592, f594;
add.f32 f596, f594, f592;
add.f32 f597, f559, f564;
add.f32 f598, f508, f597;
add.f32 f599, f561, f566;
add.f32 f600, f514, f599;
mul.f32 f601, f597, 0f3F000000;
sub.f32 f602, f508, f601;
sub.f32 f603, f561, f566;
mul.f32 f604, f603, 0f3F5DB3D7;
add.f32 f605, f604, f602;
sub.f32 f606, f602, f604;
mul.f32 f607, f599, 0f3F000000;
sub.f32 f608, f514, f607;
sub.f32 f609, f559, f564;
mul.f32 f610, f609, 0f3F5DB3D7;
sub.f32 f611, f608, f610;
add.f32 f612, f610, f608;
mul.wide.u32 rd12, r7, -901412889;
shr.u64 rd13, rd12, 38;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 81;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 8;
mov.u64 rd15, %21;
add.s64 rd16, rd15, rd14;
ld.global.v2.f32 {f613, f614}, [rd16];
mul.f32 f617, f613, f582;
mul.f32 f618, f614, f584;
mul.f32 f619, f613, f584;
mul.f32 f620, f613, f613;
mul.f32 f621, f614, f614;
sub.f32 f622, f620, f621;
mul.f32 f623, f614, f613;
fma.rn.f32 f624, f614, f613, f623;
mul.f32 f625, f622, f598;
mul.f32 f626, f624, f600;
mul.f32 f627, f622, f600;
mul.f32 f628, f613, f622;
mul.f32 f629, f614, f624;
sub.f32 f630, f628, f629;
mul.f32 f631, f613, f624;
fma.rn.f32 f632, f614, f622, f631;
mul.f32 f633, f630, f573;
mul.f32 f634, f632, f579;
mul.f32 f635, f630, f579;
mul.f32 f636, f613, f630;
mul.f32 f637, f614, f632;
sub.f32 f638, f636, f637;
mul.f32 f639, f613, f632;
fma.rn.f32 f640, f614, f630, f639;
mul.f32 f641, f638, f589;
mul.f32 f642, f640, f595;
mul.f32 f643, f638, f595;
mul.f32 f644, f613, f638;
mul.f32 f645, f614, f640;
sub.f32 f646, f644, f645;
mul.f32 f647, f613, f640;
fma.rn.f32 f648, f614, f638, f647;
mul.f32 f649, f646, f605;
mul.f32 f650, f648, f611;
mul.f32 f651, f646, f611;
mul.f32 f652, f613, f646;
mul.f32 f653, f614, f648;
sub.f32 f654, f652, f653;
mul.f32 f655, f613, f648;
fma.rn.f32 f656, f614, f646, f655;
mul.f32 f657, f654, f574;
mul.f32 f658, f656, f580;
mul.f32 f659, f654, f580;
mul.f32 f660, f613, f654;
mul.f32 f661, f614, f656;
sub.f32 f662, f660, f661;
mul.f32 f663, f613, f656;
fma.rn.f32 f664, f614, f654, f663;
mul.f32 f665, f662, f590;
mul.f32 f666, f664, f596;
mul.f32 f667, f662, f596;
mul.f32 f668, f613, f662;
mul.f32 f669, f614, f664;
sub.f32 f670, f668, f669;
mul.f32 f671, f613, f664;
fma.rn.f32 f672, f614, f662, f671;
mul.f32 f673, f670, f606;
mul.f32 f674, f672, f612;
mul.f32 f675, f670, f612;
shl.b32 r21, r20, 3;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 5832, r22;
add.f32 f676, f502, f568;
add.f32 f677, f500, f567;
st.shared.v2.f32 [r23], {f677, f676};
fma.rn.f32 f678, f614, f582, f619;
sub.f32 f679, f617, f618;
st.shared.v2.f32 [r23+648], {f679, f678};
fma.rn.f32 f680, f624, f598, f627;
sub.f32 f681, f625, f626;
st.shared.v2.f32 [r23+1296], {f681, f680};
fma.rn.f32 f682, f632, f573, f635;
sub.f32 f683, f633, f634;
st.shared.v2.f32 [r23+1944], {f683, f682};
fma.rn.f32 f684, f640, f589, f643;
sub.f32 f685, f641, f642;
st.shared.v2.f32 [r23+2592], {f685, f684};
fma.rn.f32 f686, f648, f605, f651;
sub.f32 f687, f649, f650;
st.shared.v2.f32 [r23+3240], {f687, f686};
fma.rn.f32 f688, f656, f574, f659;
sub.f32 f689, f657, f658;
st.shared.v2.f32 [r23+3888], {f689, f688};
sub.f32 f690, f665, f666;
fma.rn.f32 f691, f664, f590, f667;
st.shared.v2.f32 [r23+4536], {f690, f691};
fma.rn.f32 f692, f672, f606, f675;
sub.f32 f693, f673, f674;
st.shared.v2.f32 [r23+5184], {f693, f692};
barrier.sync 0;
ld.shared.v2.f32 {f694, f695}, [r11];
ld.shared.v2.f32 {f698, f699}, [r11+1944];
ld.shared.v2.f32 {f702, f703}, [r11+3888];
ld.shared.v2.f32 {f706, f707}, [r11+5832];
ld.shared.v2.f32 {f710, f711}, [r11+7776];
ld.shared.v2.f32 {f714, f715}, [r11+9720];
ld.shared.v2.f32 {f718, f719}, [r11+11664];
ld.shared.v2.f32 {f722, f723}, [r11+13608];
ld.shared.v2.f32 {f726, f727}, [r11+15552];
add.f32 f730, f706, f718;
add.f32 f731, f707, f719;
mul.f32 f732, f730, 0f3F000000;
sub.f32 f733, f694, f732;
sub.f32 f734, f707, f719;
mul.f32 f735, f734, 0f3F5DB3D7;
mul.f32 f736, f731, 0f3F000000;
sub.f32 f737, f695, f736;
sub.f32 f738, f706, f718;
mul.f32 f739, f738, 0f3F5DB3D7;
add.f32 f740, f710, f722;
add.f32 f741, f711, f723;
mul.f32 f742, f740, 0f3F000000;
sub.f32 f743, f698, f742;
sub.f32 f744, f711, f723;
mul.f32 f745, f744, 0f3F5DB3D7;
mul.f32 f746, f741, 0f3F000000;
sub.f32 f747, f699, f746;
sub.f32 f748, f710, f722;
mul.f32 f749, f748, 0f3F5DB3D7;
add.f32 f750, f714, f726;
add.f32 f751, f715, f727;
mul.f32 f752, f750, 0f3F000000;
sub.f32 f753, f702, f752;
sub.f32 f754, f715, f727;
mul.f32 f755, f754, 0f3F5DB3D7;
mul.f32 f756, f751, 0f3F000000;
sub.f32 f757, f703, f756;
sub.f32 f758, f714, f726;
mul.f32 f759, f758, 0f3F5DB3D7;
add.f32 %1, f695, f731;
add.f32 %0, f694, f730;
add.f32 %3, f699, f741;
add.f32 %2, f698, f740;
add.f32 %5, f703, f751;
add.f32 %4, f702, f750;
sub.f32 %7, f737, f739;
add.f32 %6, f735, f733;
sub.f32 %9, f747, f749;
add.f32 %8, f745, f743;
sub.f32 %11, f757, f759;
add.f32 %10, f755, f753;
add.f32 %13, f739, f737;
sub.f32 %12, f733, f735;
add.f32 %15, f749, f747;
sub.f32 %14, f743, f745;
add.f32 %17, f759, f757;
sub.f32 %16, f753, f755;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y): "r"(smem), "l"(lut_sp_9_2187), "l"(lut_sp_9_243), "l"(lut_sp_9_27), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<146, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<724>;
.reg .b32 r<24>;
.reg .b64 rd<17>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 8748, r2;
add.f32 f37, %30, %38;
add.f32 f38, %22, f37;
add.f32 f39, %31, %39;
add.f32 f40, %23, f39;
mul.f32 f41, f37, 0f3F000000;
sub.f32 f42, %22, f41;
sub.f32 f43, %31, %39;
mul.f32 f44, f43, 0f3F5DB3D7;
add.f32 f45, f44, f42;
sub.f32 f46, f42, f44;
mul.f32 f47, f39, 0f3F000000;
sub.f32 f48, %23, f47;
sub.f32 f49, %30, %38;
mul.f32 f50, f49, 0f3F5DB3D7;
sub.f32 f51, f48, f50;
add.f32 f52, f50, f48;
add.f32 f53, %32, %40;
add.f32 f54, %24, f53;
add.f32 f55, %34, %42;
add.f32 f56, %26, f55;
mul.f32 f57, f53, 0f3F000000;
sub.f32 f58, %24, f57;
sub.f32 f59, %34, %42;
mul.f32 f60, f59, 0f3F5DB3D7;
add.f32 f61, f60, f58;
sub.f32 f62, f58, f60;
mul.f32 f63, f55, 0f3F000000;
sub.f32 f64, %26, f63;
sub.f32 f65, %32, %40;
mul.f32 f66, f65, 0f3F5DB3D7;
sub.f32 f67, f64, f66;
add.f32 f68, f66, f64;
add.f32 f69, %35, %43;
add.f32 f70, %27, f69;
add.f32 f71, %37, %44;
add.f32 f72, %29, f71;
mul.f32 f73, f69, 0f3F000000;
sub.f32 f74, %27, f73;
sub.f32 f75, %37, %44;
mul.f32 f76, f75, 0f3F5DB3D7;
add.f32 f77, f76, f74;
sub.f32 f78, f74, f76;
mul.f32 f79, f71, 0f3F000000;
sub.f32 f80, %29, f79;
sub.f32 f81, %35, %43;
mul.f32 f82, f81, 0f3F5DB3D7;
sub.f32 f83, f80, f82;
add.f32 f84, f82, f80;
mov.u32 r4, %tid.x;
mul.f32 f85, f61, 0f3F441B7D;
mul.f32 f86, f67, 0fBF248DBB;
sub.f32 f87, f85, f86;
mul.f32 f88, f67, 0f3F441B7D;
fma.rn.f32 f89, f61, 0fBF248DBB, f88;
mul.f32 f90, f77, 0f3E31D0D4;
mul.f32 f91, f83, 0fBF7C1C5C;
sub.f32 f92, f90, f91;
mul.f32 f93, f83, 0f3E31D0D4;
fma.rn.f32 f94, f77, 0fBF7C1C5C, f93;
mul.f32 f95, f62, 0f3E31D0D4;
mul.f32 f96, f68, 0fBF7C1C5C;
sub.f32 f97, f95, f96;
mul.f32 f98, f68, 0f3E31D0D4;
fma.rn.f32 f99, f62, 0fBF7C1C5C, f98;
mul.f32 f100, f78, 0fBF708FB2;
mul.f32 f101, f84, 0fBEAF1D44;
sub.f32 f102, f100, f101;
mul.f32 f103, f84, 0fBF708FB2;
fma.rn.f32 f104, f78, 0fBEAF1D44, f103;
add.f32 f105, f54, f70;
add.f32 f106, f38, f105;
add.f32 f107, f56, f72;
add.f32 f108, f40, f107;
mul.f32 f109, f105, 0f3F000000;
sub.f32 f110, f38, f109;
sub.f32 f111, f56, f72;
mul.f32 f112, f111, 0f3F5DB3D7;
add.f32 f113, f112, f110;
sub.f32 f114, f110, f112;
mul.f32 f115, f107, 0f3F000000;
sub.f32 f116, f40, f115;
sub.f32 f117, f54, f70;
mul.f32 f118, f117, 0f3F5DB3D7;
sub.f32 f119, f116, f118;
add.f32 f120, f118, f116;
add.f32 f121, f87, f92;
add.f32 f122, f45, f121;
add.f32 f123, f89, f94;
add.f32 f124, f51, f123;
mul.f32 f125, f121, 0f3F000000;
sub.f32 f126, f45, f125;
sub.f32 f127, f89, f94;
mul.f32 f128, f127, 0f3F5DB3D7;
add.f32 f129, f128, f126;
sub.f32 f130, f126, f128;
mul.f32 f131, f123, 0f3F000000;
sub.f32 f132, f51, f131;
sub.f32 f133, f87, f92;
mul.f32 f134, f133, 0f3F5DB3D7;
sub.f32 f135, f132, f134;
add.f32 f136, f134, f132;
add.f32 f137, f97, f102;
add.f32 f138, f46, f137;
add.f32 f139, f99, f104;
add.f32 f140, f52, f139;
mul.f32 f141, f137, 0f3F000000;
sub.f32 f142, f46, f141;
sub.f32 f143, f99, f104;
mul.f32 f144, f143, 0f3F5DB3D7;
add.f32 f145, f144, f142;
sub.f32 f146, f142, f144;
mul.f32 f147, f139, 0f3F000000;
sub.f32 f148, f52, f147;
sub.f32 f149, f97, f102;
mul.f32 f150, f149, 0f3F5DB3D7;
sub.f32 f151, f148, f150;
add.f32 f152, f150, f148;
mul.wide.u32 rd2, r4, -2032597691;
shr.u64 rd3, rd2, 39;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 243;
sub.s32 r7, r4, r6;
mul.wide.u32 rd4, r7, 8;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f153, f154}, [rd6];
mul.f32 f157, f153, f122;
mul.f32 f158, f154, f124;
sub.f32 f159, f157, f158;
mul.f32 f160, f153, f124;
fma.rn.f32 f161, f154, f122, f160;
mul.f32 f162, f153, f153;
mul.f32 f163, f154, f154;
sub.f32 f164, f162, f163;
mul.f32 f165, f154, f153;
fma.rn.f32 f166, f154, f153, f165;
mul.f32 f167, f164, f138;
mul.f32 f168, f166, f140;
sub.f32 f169, f167, f168;
mul.f32 f170, f164, f140;
fma.rn.f32 f171, f166, f138, f170;
mul.f32 f172, f153, f164;
mul.f32 f173, f154, f166;
sub.f32 f174, f172, f173;
mul.f32 f175, f153, f166;
fma.rn.f32 f176, f154, f164, f175;
mul.f32 f177, f174, f113;
mul.f32 f178, f176, f119;
sub.f32 f179, f177, f178;
mul.f32 f180, f174, f119;
fma.rn.f32 f181, f176, f113, f180;
mul.f32 f182, f153, f174;
mul.f32 f183, f154, f176;
sub.f32 f184, f182, f183;
mul.f32 f185, f153, f176;
fma.rn.f32 f186, f154, f174, f185;
mul.f32 f187, f184, f129;
mul.f32 f188, f186, f135;
sub.f32 f189, f187, f188;
mul.f32 f190, f184, f135;
fma.rn.f32 f191, f186, f129, f190;
mul.f32 f192, f153, f184;
mul.f32 f193, f154, f186;
sub.f32 f194, f192, f193;
mul.f32 f195, f153, f186;
fma.rn.f32 f196, f154, f184, f195;
mul.f32 f197, f194, f145;
mul.f32 f198, f196, f151;
sub.f32 f199, f197, f198;
mul.f32 f200, f194, f151;
fma.rn.f32 f201, f196, f145, f200;
mul.f32 f202, f153, f194;
mul.f32 f203, f154, f196;
sub.f32 f204, f202, f203;
mul.f32 f205, f153, f196;
fma.rn.f32 f206, f154, f194, f205;
mul.f32 f207, f204, f114;
mul.f32 f208, f206, f120;
sub.f32 f209, f207, f208;
mul.f32 f210, f204, f120;
fma.rn.f32 f211, f206, f114, f210;
mul.f32 f212, f153, f204;
mul.f32 f213, f154, f206;
sub.f32 f214, f212, f213;
mul.f32 f215, f153, f206;
fma.rn.f32 f216, f154, f204, f215;
mul.f32 f217, f214, f130;
mul.f32 f218, f216, f136;
sub.f32 f219, f217, f218;
mul.f32 f220, f214, f136;
fma.rn.f32 f221, f216, f130, f220;
mul.f32 f222, f153, f214;
mul.f32 f223, f154, f216;
sub.f32 f224, f222, f223;
mul.f32 f225, f153, f216;
fma.rn.f32 f226, f154, f214, f225;
mul.f32 f227, f224, f146;
mul.f32 f228, f226, f152;
sub.f32 f229, f227, f228;
mul.f32 f230, f224, f152;
fma.rn.f32 f231, f226, f146, f230;
mad.lo.s32 r8, r5, 8748, r3;
barrier.sync 0;
mad.lo.s32 r9, r7, 36, r8;
st.shared.f32 [r9], f106;
st.shared.f32 [r9+4], f159;
st.shared.f32 [r9+8], f169;
st.shared.f32 [r9+12], f179;
st.shared.f32 [r9+16], f189;
st.shared.f32 [r9+20], f199;
st.shared.f32 [r9+24], f209;
st.shared.f32 [r9+28], f219;
st.shared.f32 [r9+32], f229;
barrier.sync 0;
shl.b32 r10, r7, 5;
sub.s32 r11, r9, r10;
ld.shared.f32 f232, [r11];
ld.shared.f32 f233, [r11+972];
ld.shared.f32 f234, [r11+1944];
ld.shared.f32 f235, [r11+2916];
ld.shared.f32 f236, [r11+3888];
ld.shared.f32 f237, [r11+4860];
ld.shared.f32 f238, [r11+5832];
ld.shared.f32 f239, [r11+6804];
ld.shared.f32 f240, [r11+7776];
barrier.sync 0;
st.shared.f32 [r9], f108;
st.shared.f32 [r9+4], f161;
st.shared.f32 [r9+8], f171;
st.shared.f32 [r9+12], f181;
st.shared.f32 [r9+16], f191;
st.shared.f32 [r9+20], f201;
st.shared.f32 [r9+24], f211;
st.shared.f32 [r9+28], f221;
st.shared.f32 [r9+32], f231;
barrier.sync 0;
ld.shared.f32 f241, [r11];
ld.shared.f32 f242, [r11+972];
ld.shared.f32 f243, [r11+1944];
ld.shared.f32 f244, [r11+2916];
ld.shared.f32 f245, [r11+3888];
ld.shared.f32 f246, [r11+4860];
ld.shared.f32 f247, [r11+5832];
ld.shared.f32 f248, [r11+6804];
ld.shared.f32 f249, [r11+7776];
add.f32 f250, f235, f238;
add.f32 f251, f232, f250;
add.f32 f252, f244, f247;
add.f32 f253, f241, f252;
mul.f32 f254, f250, 0f3F000000;
sub.f32 f255, f232, f254;
sub.f32 f256, f244, f247;
mul.f32 f257, f256, 0f3F5DB3D7;
add.f32 f258, f257, f255;
sub.f32 f259, f255, f257;
mul.f32 f260, f252, 0f3F000000;
sub.f32 f261, f241, f260;
sub.f32 f262, f235, f238;
mul.f32 f263, f262, 0f3F5DB3D7;
sub.f32 f264, f261, f263;
add.f32 f265, f263, f261;
add.f32 f266, f236, f239;
add.f32 f267, f233, f266;
add.f32 f268, f245, f248;
add.f32 f269, f242, f268;
mul.f32 f270, f266, 0f3F000000;
sub.f32 f271, f233, f270;
sub.f32 f272, f245, f248;
mul.f32 f273, f272, 0f3F5DB3D7;
add.f32 f274, f273, f271;
sub.f32 f275, f271, f273;
mul.f32 f276, f268, 0f3F000000;
sub.f32 f277, f242, f276;
sub.f32 f278, f236, f239;
mul.f32 f279, f278, 0f3F5DB3D7;
sub.f32 f280, f277, f279;
add.f32 f281, f279, f277;
add.f32 f282, f237, f240;
add.f32 f283, f234, f282;
add.f32 f284, f246, f249;
add.f32 f285, f243, f284;
mul.f32 f286, f282, 0f3F000000;
sub.f32 f287, f234, f286;
sub.f32 f288, f246, f249;
mul.f32 f289, f288, 0f3F5DB3D7;
add.f32 f290, f289, f287;
sub.f32 f291, f287, f289;
mul.f32 f292, f284, 0f3F000000;
sub.f32 f293, f243, f292;
sub.f32 f294, f237, f240;
mul.f32 f295, f294, 0f3F5DB3D7;
sub.f32 f296, f293, f295;
add.f32 f297, f295, f293;
mul.f32 f298, f274, 0f3F441B7D;
mul.f32 f299, f280, 0fBF248DBB;
sub.f32 f300, f298, f299;
mul.f32 f301, f280, 0f3F441B7D;
fma.rn.f32 f302, f274, 0fBF248DBB, f301;
mul.f32 f303, f290, 0f3E31D0D4;
mul.f32 f304, f296, 0fBF7C1C5C;
sub.f32 f305, f303, f304;
mul.f32 f306, f296, 0f3E31D0D4;
fma.rn.f32 f307, f290, 0fBF7C1C5C, f306;
mul.f32 f308, f275, 0f3E31D0D4;
mul.f32 f309, f281, 0fBF7C1C5C;
sub.f32 f310, f308, f309;
mul.f32 f311, f281, 0f3E31D0D4;
fma.rn.f32 f312, f275, 0fBF7C1C5C, f311;
mul.f32 f313, f291, 0fBF708FB2;
mul.f32 f314, f297, 0fBEAF1D44;
sub.f32 f315, f313, f314;
mul.f32 f316, f297, 0fBF708FB2;
fma.rn.f32 f317, f291, 0fBEAF1D44, f316;
add.f32 f318, f267, f283;
add.f32 f319, f251, f318;
add.f32 f320, f269, f285;
add.f32 f321, f253, f320;
mul.f32 f322, f318, 0f3F000000;
sub.f32 f323, f251, f322;
sub.f32 f324, f269, f285;
mul.f32 f325, f324, 0f3F5DB3D7;
add.f32 f326, f325, f323;
sub.f32 f327, f323, f325;
mul.f32 f328, f320, 0f3F000000;
sub.f32 f329, f253, f328;
sub.f32 f330, f267, f283;
mul.f32 f331, f330, 0f3F5DB3D7;
sub.f32 f332, f329, f331;
add.f32 f333, f331, f329;
add.f32 f334, f300, f305;
add.f32 f335, f258, f334;
add.f32 f336, f302, f307;
add.f32 f337, f264, f336;
mul.f32 f338, f334, 0f3F000000;
sub.f32 f339, f258, f338;
sub.f32 f340, f302, f307;
mul.f32 f341, f340, 0f3F5DB3D7;
add.f32 f342, f341, f339;
sub.f32 f343, f339, f341;
mul.f32 f344, f336, 0f3F000000;
sub.f32 f345, f264, f344;
sub.f32 f346, f300, f305;
mul.f32 f347, f346, 0f3F5DB3D7;
sub.f32 f348, f345, f347;
add.f32 f349, f347, f345;
add.f32 f350, f310, f315;
add.f32 f351, f259, f350;
add.f32 f352, f312, f317;
add.f32 f353, f265, f352;
mul.f32 f354, f350, 0f3F000000;
sub.f32 f355, f259, f354;
sub.f32 f356, f312, f317;
mul.f32 f357, f356, 0f3F5DB3D7;
add.f32 f358, f357, f355;
sub.f32 f359, f355, f357;
mul.f32 f360, f352, 0f3F000000;
sub.f32 f361, f265, f360;
sub.f32 f362, f310, f315;
mul.f32 f363, f362, 0f3F5DB3D7;
sub.f32 f364, f361, f363;
add.f32 f365, f363, f361;
mul.wide.u32 rd7, r7, 954437177;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 9;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 8;
mov.u64 rd10, %20;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f366, f367}, [rd11];
mul.f32 f370, f366, f335;
mul.f32 f371, f367, f337;
sub.f32 f372, f370, f371;
mul.f32 f373, f366, f337;
fma.rn.f32 f374, f367, f335, f373;
mul.f32 f375, f366, f366;
mul.f32 f376, f367, f367;
sub.f32 f377, f375, f376;
mul.f32 f378, f367, f366;
fma.rn.f32 f379, f367, f366, f378;
mul.f32 f380, f377, f351;
mul.f32 f381, f379, f353;
sub.f32 f382, f380, f381;
mul.f32 f383, f377, f353;
fma.rn.f32 f384, f379, f351, f383;
mul.f32 f385, f366, f377;
mul.f32 f386, f367, f379;
sub.f32 f387, f385, f386;
mul.f32 f388, f366, f379;
fma.rn.f32 f389, f367, f377, f388;
mul.f32 f390, f387, f326;
mul.f32 f391, f389, f332;
sub.f32 f392, f390, f391;
mul.f32 f393, f387, f332;
fma.rn.f32 f394, f389, f326, f393;
mul.f32 f395, f366, f387;
mul.f32 f396, f367, f389;
sub.f32 f397, f395, f396;
mul.f32 f398, f366, f389;
fma.rn.f32 f399, f367, f387, f398;
mul.f32 f400, f397, f342;
mul.f32 f401, f399, f348;
sub.f32 f402, f400, f401;
mul.f32 f403, f397, f348;
fma.rn.f32 f404, f399, f342, f403;
mul.f32 f405, f366, f397;
mul.f32 f406, f367, f399;
sub.f32 f407, f405, f406;
mul.f32 f408, f366, f399;
fma.rn.f32 f409, f367, f397, f408;
mul.f32 f410, f407, f358;
mul.f32 f411, f409, f364;
sub.f32 f412, f410, f411;
mul.f32 f413, f407, f364;
fma.rn.f32 f414, f409, f358, f413;
mul.f32 f415, f366, f407;
mul.f32 f416, f367, f409;
sub.f32 f417, f415, f416;
mul.f32 f418, f366, f409;
fma.rn.f32 f419, f367, f407, f418;
mul.f32 f420, f417, f327;
mul.f32 f421, f419, f333;
sub.f32 f422, f420, f421;
mul.f32 f423, f417, f333;
fma.rn.f32 f424, f419, f327, f423;
mul.f32 f425, f366, f417;
mul.f32 f426, f367, f419;
sub.f32 f427, f425, f426;
mul.f32 f428, f366, f419;
fma.rn.f32 f429, f367, f417, f428;
mul.f32 f430, f427, f343;
mul.f32 f431, f429, f349;
sub.f32 f432, f430, f431;
mul.f32 f433, f427, f349;
fma.rn.f32 f434, f429, f343, f433;
mul.f32 f435, f366, f427;
mul.f32 f436, f367, f429;
sub.f32 f437, f435, f436;
mul.f32 f438, f366, f429;
fma.rn.f32 f439, f367, f427, f438;
mul.f32 f440, f437, f359;
mul.f32 f441, f439, f365;
sub.f32 f442, f440, f441;
mul.f32 f443, f437, f365;
fma.rn.f32 f444, f439, f359, f443;
shl.b32 r15, r14, 2;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 324, r16;
st.shared.f32 [r17], f319;
st.shared.f32 [r17+36], f372;
st.shared.f32 [r17+72], f382;
st.shared.f32 [r17+108], f392;
st.shared.f32 [r17+144], f402;
st.shared.f32 [r17+180], f412;
st.shared.f32 [r17+216], f422;
st.shared.f32 [r17+252], f432;
st.shared.f32 [r17+288], f442;
barrier.sync 0;
ld.shared.f32 f445, [r11];
ld.shared.f32 f446, [r11+972];
ld.shared.f32 f447, [r11+1944];
ld.shared.f32 f448, [r11+2916];
ld.shared.f32 f449, [r11+3888];
ld.shared.f32 f450, [r11+4860];
ld.shared.f32 f451, [r11+5832];
ld.shared.f32 f452, [r11+6804];
ld.shared.f32 f453, [r11+7776];
barrier.sync 0;
st.shared.f32 [r17], f321;
st.shared.f32 [r17+36], f374;
st.shared.f32 [r17+72], f384;
st.shared.f32 [r17+108], f394;
st.shared.f32 [r17+144], f404;
st.shared.f32 [r17+180], f414;
st.shared.f32 [r17+216], f424;
st.shared.f32 [r17+252], f434;
st.shared.f32 [r17+288], f444;
barrier.sync 0;
ld.shared.f32 f454, [r11];
ld.shared.f32 f455, [r11+972];
ld.shared.f32 f456, [r11+1944];
ld.shared.f32 f457, [r11+2916];
ld.shared.f32 f458, [r11+3888];
ld.shared.f32 f459, [r11+4860];
ld.shared.f32 f460, [r11+5832];
ld.shared.f32 f461, [r11+6804];
ld.shared.f32 f462, [r11+7776];
add.f32 f463, f448, f451;
add.f32 f464, f445, f463;
add.f32 f465, f457, f460;
add.f32 f466, f454, f465;
mul.f32 f467, f463, 0f3F000000;
sub.f32 f468, f445, f467;
sub.f32 f469, f457, f460;
mul.f32 f470, f469, 0f3F5DB3D7;
add.f32 f471, f470, f468;
sub.f32 f472, f468, f470;
mul.f32 f473, f465, 0f3F000000;
sub.f32 f474, f454, f473;
sub.f32 f475, f448, f451;
mul.f32 f476, f475, 0f3F5DB3D7;
sub.f32 f477, f474, f476;
add.f32 f478, f476, f474;
add.f32 f479, f449, f452;
add.f32 f480, f446, f479;
add.f32 f481, f458, f461;
add.f32 f482, f455, f481;
mul.f32 f483, f479, 0f3F000000;
sub.f32 f484, f446, f483;
sub.f32 f485, f458, f461;
mul.f32 f486, f485, 0f3F5DB3D7;
add.f32 f487, f486, f484;
sub.f32 f488, f484, f486;
mul.f32 f489, f481, 0f3F000000;
sub.f32 f490, f455, f489;
sub.f32 f491, f449, f452;
mul.f32 f492, f491, 0f3F5DB3D7;
sub.f32 f493, f490, f492;
add.f32 f494, f492, f490;
add.f32 f495, f450, f453;
add.f32 f496, f447, f495;
add.f32 f497, f459, f462;
add.f32 f498, f456, f497;
mul.f32 f499, f495, 0f3F000000;
sub.f32 f500, f447, f499;
sub.f32 f501, f459, f462;
mul.f32 f502, f501, 0f3F5DB3D7;
add.f32 f503, f502, f500;
sub.f32 f504, f500, f502;
mul.f32 f505, f497, 0f3F000000;
sub.f32 f506, f456, f505;
sub.f32 f507, f450, f453;
mul.f32 f508, f507, 0f3F5DB3D7;
sub.f32 f509, f506, f508;
add.f32 f510, f508, f506;
mul.f32 f511, f487, 0f3F441B7D;
mul.f32 f512, f493, 0fBF248DBB;
sub.f32 f513, f511, f512;
mul.f32 f514, f493, 0f3F441B7D;
fma.rn.f32 f515, f487, 0fBF248DBB, f514;
mul.f32 f516, f503, 0f3E31D0D4;
mul.f32 f517, f509, 0fBF7C1C5C;
sub.f32 f518, f516, f517;
mul.f32 f519, f509, 0f3E31D0D4;
fma.rn.f32 f520, f503, 0fBF7C1C5C, f519;
mul.f32 f521, f488, 0f3E31D0D4;
mul.f32 f522, f494, 0fBF7C1C5C;
sub.f32 f523, f521, f522;
mul.f32 f524, f494, 0f3E31D0D4;
fma.rn.f32 f525, f488, 0fBF7C1C5C, f524;
mul.f32 f526, f504, 0fBF708FB2;
mul.f32 f527, f510, 0fBEAF1D44;
sub.f32 f528, f526, f527;
mul.f32 f529, f510, 0fBF708FB2;
fma.rn.f32 f530, f504, 0fBEAF1D44, f529;
add.f32 f531, f480, f496;
add.f32 f532, f464, f531;
add.f32 f533, f482, f498;
add.f32 f534, f466, f533;
mul.f32 f535, f531, 0f3F000000;
sub.f32 f536, f464, f535;
sub.f32 f537, f482, f498;
mul.f32 f538, f537, 0f3F5DB3D7;
add.f32 f539, f538, f536;
sub.f32 f540, f536, f538;
mul.f32 f541, f533, 0f3F000000;
sub.f32 f542, f466, f541;
sub.f32 f543, f480, f496;
mul.f32 f544, f543, 0f3F5DB3D7;
sub.f32 f545, f542, f544;
add.f32 f546, f544, f542;
add.f32 f547, f513, f518;
add.f32 f548, f471, f547;
add.f32 f549, f515, f520;
add.f32 f550, f477, f549;
mul.f32 f551, f547, 0f3F000000;
sub.f32 f552, f471, f551;
sub.f32 f553, f515, f520;
mul.f32 f554, f553, 0f3F5DB3D7;
add.f32 f555, f554, f552;
sub.f32 f556, f552, f554;
mul.f32 f557, f549, 0f3F000000;
sub.f32 f558, f477, f557;
sub.f32 f559, f513, f518;
mul.f32 f560, f559, 0f3F5DB3D7;
sub.f32 f561, f558, f560;
add.f32 f562, f560, f558;
add.f32 f563, f523, f528;
add.f32 f564, f472, f563;
add.f32 f565, f525, f530;
add.f32 f566, f478, f565;
mul.f32 f567, f563, 0f3F000000;
sub.f32 f568, f472, f567;
sub.f32 f569, f525, f530;
mul.f32 f570, f569, 0f3F5DB3D7;
add.f32 f571, f570, f568;
sub.f32 f572, f568, f570;
mul.f32 f573, f565, 0f3F000000;
sub.f32 f574, f478, f573;
sub.f32 f575, f523, f528;
mul.f32 f576, f575, 0f3F5DB3D7;
sub.f32 f577, f574, f576;
add.f32 f578, f576, f574;
mul.wide.u32 rd12, r7, -901412889;
shr.u64 rd13, rd12, 38;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 81;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 8;
mov.u64 rd15, %21;
add.s64 rd16, rd15, rd14;
ld.global.v2.f32 {f579, f580}, [rd16];
mul.f32 f583, f579, f548;
mul.f32 f584, f580, f550;
sub.f32 f585, f583, f584;
mul.f32 f586, f579, f550;
fma.rn.f32 f587, f580, f548, f586;
mul.f32 f588, f579, f579;
mul.f32 f589, f580, f580;
sub.f32 f590, f588, f589;
mul.f32 f591, f580, f579;
fma.rn.f32 f592, f580, f579, f591;
mul.f32 f593, f590, f564;
mul.f32 f594, f592, f566;
sub.f32 f595, f593, f594;
mul.f32 f596, f590, f566;
fma.rn.f32 f597, f592, f564, f596;
mul.f32 f598, f579, f590;
mul.f32 f599, f580, f592;
sub.f32 f600, f598, f599;
mul.f32 f601, f579, f592;
fma.rn.f32 f602, f580, f590, f601;
mul.f32 f603, f600, f539;
mul.f32 f604, f602, f545;
sub.f32 f605, f603, f604;
mul.f32 f606, f600, f545;
fma.rn.f32 f607, f602, f539, f606;
mul.f32 f608, f579, f600;
mul.f32 f609, f580, f602;
sub.f32 f610, f608, f609;
mul.f32 f611, f579, f602;
fma.rn.f32 f612, f580, f600, f611;
mul.f32 f613, f610, f555;
mul.f32 f614, f612, f561;
sub.f32 f615, f613, f614;
mul.f32 f616, f610, f561;
fma.rn.f32 f617, f612, f555, f616;
mul.f32 f618, f579, f610;
mul.f32 f619, f580, f612;
sub.f32 f620, f618, f619;
mul.f32 f621, f579, f612;
fma.rn.f32 f622, f580, f610, f621;
mul.f32 f623, f620, f571;
mul.f32 f624, f622, f577;
sub.f32 f625, f623, f624;
mul.f32 f626, f620, f577;
fma.rn.f32 f627, f622, f571, f626;
mul.f32 f628, f579, f620;
mul.f32 f629, f580, f622;
sub.f32 f630, f628, f629;
mul.f32 f631, f579, f622;
fma.rn.f32 f632, f580, f620, f631;
mul.f32 f633, f630, f540;
mul.f32 f634, f632, f546;
sub.f32 f635, f633, f634;
mul.f32 f636, f630, f546;
fma.rn.f32 f637, f632, f540, f636;
mul.f32 f638, f579, f630;
mul.f32 f639, f580, f632;
sub.f32 f640, f638, f639;
mul.f32 f641, f579, f632;
fma.rn.f32 f642, f580, f630, f641;
mul.f32 f643, f640, f556;
mul.f32 f644, f642, f562;
sub.f32 f645, f643, f644;
mul.f32 f646, f640, f562;
fma.rn.f32 f647, f642, f556, f646;
mul.f32 f648, f579, f640;
mul.f32 f649, f580, f642;
sub.f32 f650, f648, f649;
mul.f32 f651, f579, f642;
fma.rn.f32 f652, f580, f640, f651;
mul.f32 f653, f650, f572;
mul.f32 f654, f652, f578;
sub.f32 f655, f653, f654;
mul.f32 f656, f650, f578;
fma.rn.f32 f657, f652, f572, f656;
shl.b32 r21, r20, 2;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 2916, r22;
st.shared.f32 [r23], f532;
st.shared.f32 [r23+324], f585;
st.shared.f32 [r23+648], f595;
st.shared.f32 [r23+972], f605;
st.shared.f32 [r23+1296], f615;
st.shared.f32 [r23+1620], f625;
st.shared.f32 [r23+1944], f635;
st.shared.f32 [r23+2268], f645;
st.shared.f32 [r23+2592], f655;
barrier.sync 0;
ld.shared.f32 f658, [r11];
ld.shared.f32 f659, [r11+972];
ld.shared.f32 f660, [r11+1944];
ld.shared.f32 f661, [r11+2916];
ld.shared.f32 f662, [r11+3888];
ld.shared.f32 f663, [r11+4860];
ld.shared.f32 f664, [r11+5832];
ld.shared.f32 f665, [r11+6804];
ld.shared.f32 f666, [r11+7776];
barrier.sync 0;
st.shared.f32 [r23], f534;
st.shared.f32 [r23+324], f587;
st.shared.f32 [r23+648], f597;
st.shared.f32 [r23+972], f607;
st.shared.f32 [r23+1296], f617;
st.shared.f32 [r23+1620], f627;
st.shared.f32 [r23+1944], f637;
st.shared.f32 [r23+2268], f647;
st.shared.f32 [r23+2592], f657;
barrier.sync 0;
ld.shared.f32 f667, [r11];
ld.shared.f32 f668, [r11+972];
ld.shared.f32 f669, [r11+1944];
ld.shared.f32 f670, [r11+2916];
ld.shared.f32 f671, [r11+3888];
ld.shared.f32 f672, [r11+4860];
ld.shared.f32 f673, [r11+5832];
ld.shared.f32 f674, [r11+6804];
ld.shared.f32 f675, [r11+7776];
add.f32 f676, f661, f664;
add.f32 f677, f670, f673;
mul.f32 f678, f676, 0f3F000000;
sub.f32 f679, f658, f678;
sub.f32 f680, f670, f673;
mul.f32 f681, f680, 0f3F5DB3D7;
mul.f32 f682, f677, 0f3F000000;
sub.f32 f683, f667, f682;
sub.f32 f684, f661, f664;
mul.f32 f685, f684, 0f3F5DB3D7;
add.f32 f686, f662, f665;
add.f32 f687, f671, f674;
mul.f32 f688, f686, 0f3F000000;
sub.f32 f689, f659, f688;
sub.f32 f690, f671, f674;
mul.f32 f691, f690, 0f3F5DB3D7;
mul.f32 f692, f687, 0f3F000000;
sub.f32 f693, f668, f692;
sub.f32 f694, f662, f665;
mul.f32 f695, f694, 0f3F5DB3D7;
add.f32 f696, f663, f666;
add.f32 f697, f672, f675;
mul.f32 f698, f696, 0f3F000000;
sub.f32 f699, f660, f698;
sub.f32 f700, f672, f675;
mul.f32 f701, f700, 0f3F5DB3D7;
mul.f32 f702, f697, 0f3F000000;
sub.f32 f703, f669, f702;
sub.f32 f704, f663, f666;
mul.f32 f705, f704, 0f3F5DB3D7;
add.f32 %0, f658, f676;
add.f32 %1, f667, f677;
add.f32 %2, f659, f686;
add.f32 %3, f668, f687;
add.f32 %4, f660, f696;
add.f32 %5, f669, f697;
add.f32 %6, f681, f679;
sub.f32 %7, f683, f685;
add.f32 %8, f691, f689;
sub.f32 %9, f693, f695;
add.f32 %10, f701, f699;
sub.f32 %11, f703, f705;
sub.f32 %12, f679, f681;
add.f32 %13, f685, f683;
sub.f32 %14, f689, f691;
add.f32 %15, f695, f693;
sub.f32 %16, f699, f701;
add.f32 %17, f705, f703;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y): "r"(smem), "l"(lut_sp_9_2187), "l"(lut_sp_9_243), "l"(lut_sp_9_27), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<147, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<2432>;
.reg .b32 r<24>;
.reg .b64 rd<16>;
mov.u32 r22, %tid.y;
mov.u32 r23, %54;
mad.lo.s32 r3, r22, 8748, r23;
add.f32 f109, %75, %93;
add.f32 f110, %57, f109;
mul.f32 f113, f109, 0f3F000000;
sub.f32 f114, %57, f113;
add.f32 f2423, %76, %94;
sub.f32 f115, %76, %94;
mul.f32 f116, f115, 0f3F5DB3D7;
add.f32 f117, f116, f114;
sub.f32 f118, f114, f116;
add.f32 f2422, %58, f2423;
mul.f32 f119, f2423, 0f3F000000;
sub.f32 f120, %58, f119;
sub.f32 f121, %75, %93;
mul.f32 f122, f121, 0f3F5DB3D7;
sub.f32 f123, f120, f122;
add.f32 f124, f122, f120;
add.f32 f125, %81, %99;
add.f32 f126, %63, f125;
mul.f32 f129, f125, 0f3F000000;
sub.f32 f130, %63, f129;
add.f32 f2421, %82, %100;
sub.f32 f131, %82, %100;
mul.f32 f132, f131, 0f3F5DB3D7;
add.f32 f133, f132, f130;
sub.f32 f134, f130, f132;
add.f32 f2420, %64, f2421;
mul.f32 f135, f2421, 0f3F000000;
sub.f32 f136, %64, f135;
sub.f32 f137, %81, %99;
mul.f32 f138, f137, 0f3F5DB3D7;
sub.f32 f139, f136, f138;
add.f32 f140, f138, f136;
add.f32 f141, %87, %105;
add.f32 f142, %69, f141;
mul.f32 f145, f141, 0f3F000000;
sub.f32 f146, %69, f145;
add.f32 f2419, %88, %106;
sub.f32 f147, %88, %106;
mul.f32 f148, f147, 0f3F5DB3D7;
add.f32 f149, f148, f146;
sub.f32 f150, f146, f148;
add.f32 f2418, %70, f2419;
mul.f32 f151, f2419, 0f3F000000;
sub.f32 f152, %70, f151;
sub.f32 f153, %87, %105;
mul.f32 f154, f153, 0f3F5DB3D7;
sub.f32 f155, f152, f154;
add.f32 f156, f154, f152;
mul.f32 f158, f139, 0fBF248DBB;
mul.f32 f2417, f133, 0f3F441B7D;
sub.f32 f159, f2417, f158;
mul.f32 f160, f139, 0f3F441B7D;
fma.rn.f32 f161, f133, 0fBF248DBB, f160;
mul.f32 f2415, f149, 0f3E31D0D4;
mul.f32 f2416, f155, 0fBF7C1C5C;
sub.f32 f164, f2415, f2416;
mul.f32 f165, f155, 0f3E31D0D4;
fma.rn.f32 f166, f149, 0fBF7C1C5C, f165;
mul.f32 f2413, f134, 0f3E31D0D4;
mul.f32 f2414, f140, 0fBF7C1C5C;
sub.f32 f169, f2413, f2414;
mul.f32 f170, f140, 0f3E31D0D4;
fma.rn.f32 f171, f134, 0fBF7C1C5C, f170;
mul.f32 f2411, f150, 0fBF708FB2;
mul.f32 f2412, f156, 0fBEAF1D44;
sub.f32 f174, f2411, f2412;
mul.f32 f175, f156, 0fBF708FB2;
fma.rn.f32 f176, f150, 0fBEAF1D44, f175;
add.f32 f177, f126, f142;
add.f32 f178, f110, f177;
mul.f32 f181, f177, 0f3F000000;
sub.f32 f182, f110, f181;
add.f32 f2410, f2420, f2418;
sub.f32 f183, f2420, f2418;
mul.f32 f184, f183, 0f3F5DB3D7;
add.f32 f185, f184, f182;
sub.f32 f186, f182, f184;
add.f32 f2409, f2422, f2410;
mul.f32 f187, f2410, 0f3F000000;
sub.f32 f188, f2422, f187;
sub.f32 f189, f126, f142;
mul.f32 f190, f189, 0f3F5DB3D7;
sub.f32 f191, f188, f190;
add.f32 f192, f190, f188;
add.f32 f193, f159, f164;
add.f32 f194, f117, f193;
mul.f32 f197, f193, 0f3F000000;
sub.f32 f198, f117, f197;
add.f32 f2408, f161, f166;
sub.f32 f199, f161, f166;
mul.f32 f200, f199, 0f3F5DB3D7;
add.f32 f201, f200, f198;
sub.f32 f202, f198, f200;
add.f32 f2407, f123, f2408;
mul.f32 f203, f2408, 0f3F000000;
sub.f32 f204, f123, f203;
sub.f32 f205, f159, f164;
mul.f32 f206, f205, 0f3F5DB3D7;
sub.f32 f207, f204, f206;
add.f32 f208, f206, f204;
add.f32 f209, f169, f174;
add.f32 f210, f118, f209;
mul.f32 f213, f209, 0f3F000000;
sub.f32 f214, f118, f213;
add.f32 f2406, f171, f176;
sub.f32 f215, f171, f176;
mul.f32 f216, f215, 0f3F5DB3D7;
add.f32 f217, f216, f214;
sub.f32 f218, f214, f216;
add.f32 f2405, f124, f2406;
mul.f32 f219, f2406, 0f3F000000;
sub.f32 f220, f124, f219;
sub.f32 f221, f169, f174;
mul.f32 f222, f221, 0f3F5DB3D7;
sub.f32 f223, f220, f222;
add.f32 f224, f222, f220;
add.f32 f225, %77, %95;
add.f32 f226, %59, f225;
mul.f32 f229, f225, 0f3F000000;
sub.f32 f230, %59, f229;
add.f32 f2402, %111, %112;
sub.f32 f231, %111, %112;
mul.f32 f232, f231, 0f3F5DB3D7;
add.f32 f233, f232, f230;
sub.f32 f234, f230, f232;
add.f32 f2400, %113, f2402;
mul.f32 f235, f2402, 0f3F000000;
sub.f32 f236, %113, f235;
sub.f32 f237, %77, %95;
mul.f32 f238, f237, 0f3F5DB3D7;
sub.f32 f239, f236, f238;
add.f32 f240, f238, f236;
add.f32 f241, %83, %101;
add.f32 f242, %65, f241;
mul.f32 f245, f241, 0f3F000000;
sub.f32 f246, %65, f245;
add.f32 f2397, %115, %114;
sub.f32 f247, %115, %114;
mul.f32 f248, f247, 0f3F5DB3D7;
add.f32 f249, f248, f246;
sub.f32 f250, f246, f248;
add.f32 f2395, %116, f2397;
mul.f32 f251, f2397, 0f3F000000;
sub.f32 f252, %116, f251;
sub.f32 f253, %83, %101;
mul.f32 f254, f253, 0f3F5DB3D7;
sub.f32 f255, f252, f254;
add.f32 f256, f254, f252;
add.f32 f257, %89, %107;
add.f32 f258, %71, f257;
mul.f32 f261, f257, 0f3F000000;
sub.f32 f262, %71, f261;
add.f32 f2392, %117, %118;
sub.f32 f263, %117, %118;
mul.f32 f264, f263, 0f3F5DB3D7;
add.f32 f265, f264, f262;
sub.f32 f266, f262, f264;
add.f32 f2390, %119, f2392;
mul.f32 f267, f2392, 0f3F000000;
sub.f32 f268, %119, f267;
sub.f32 f269, %89, %107;
mul.f32 f270, f269, 0f3F5DB3D7;
sub.f32 f271, f268, f270;
add.f32 f272, f270, f268;
mul.f32 f274, f255, 0fBF248DBB;
mul.f32 f2389, f249, 0f3F441B7D;
sub.f32 f275, f2389, f274;
mul.f32 f276, f255, 0f3F441B7D;
fma.rn.f32 f277, f249, 0fBF248DBB, f276;
mul.f32 f279, f271, 0fBF7C1C5C;
mul.f32 f2388, f265, 0f3E31D0D4;
sub.f32 f280, f2388, f279;
mul.f32 f281, f271, 0f3E31D0D4;
fma.rn.f32 f282, f265, 0fBF7C1C5C, f281;
mul.f32 f2386, f250, 0f3E31D0D4;
mul.f32 f2387, f256, 0fBF7C1C5C;
sub.f32 f285, f2386, f2387;
mul.f32 f286, f256, 0f3E31D0D4;
fma.rn.f32 f287, f250, 0fBF7C1C5C, f286;
mul.f32 f2384, f266, 0fBF708FB2;
mul.f32 f2385, f272, 0fBEAF1D44;
sub.f32 f290, f2384, f2385;
mul.f32 f291, f272, 0fBF708FB2;
fma.rn.f32 f292, f266, 0fBEAF1D44, f291;
add.f32 f293, f242, f258;
add.f32 f294, f226, f293;
mul.f32 f297, f293, 0f3F000000;
sub.f32 f298, f226, f297;
add.f32 f2383, f2395, f2390;
sub.f32 f299, f2395, f2390;
mul.f32 f300, f299, 0f3F5DB3D7;
add.f32 f301, f300, f298;
sub.f32 f302, f298, f300;
add.f32 f2382, f2400, f2383;
mul.f32 f303, f2383, 0f3F000000;
sub.f32 f304, f2400, f303;
sub.f32 f305, f242, f258;
mul.f32 f306, f305, 0f3F5DB3D7;
sub.f32 f307, f304, f306;
add.f32 f308, f306, f304;
add.f32 f309, f275, f280;
add.f32 f310, f233, f309;
mul.f32 f313, f309, 0f3F000000;
sub.f32 f314, f233, f313;
add.f32 f2381, f277, f282;
sub.f32 f315, f277, f282;
mul.f32 f316, f315, 0f3F5DB3D7;
add.f32 f317, f316, f314;
sub.f32 f318, f314, f316;
add.f32 f2380, f239, f2381;
mul.f32 f319, f2381, 0f3F000000;
sub.f32 f320, f239, f319;
sub.f32 f321, f275, f280;
mul.f32 f322, f321, 0f3F5DB3D7;
sub.f32 f323, f320, f322;
add.f32 f324, f322, f320;
add.f32 f325, f285, f290;
add.f32 f326, f234, f325;
mul.f32 f329, f325, 0f3F000000;
sub.f32 f330, f234, f329;
add.f32 f2379, f287, f292;
sub.f32 f331, f287, f292;
mul.f32 f332, f331, 0f3F5DB3D7;
add.f32 f333, f332, f330;
sub.f32 f334, f330, f332;
add.f32 f2378, f240, f2379;
mul.f32 f335, f2379, 0f3F000000;
sub.f32 f336, f240, f335;
sub.f32 f337, f285, f290;
mul.f32 f338, f337, 0f3F5DB3D7;
sub.f32 f339, f336, f338;
add.f32 f340, f338, f336;
add.f32 f341, %79, %97;
add.f32 f342, %61, f341;
mul.f32 f345, f341, 0f3F000000;
sub.f32 f346, %61, f345;
add.f32 f2375, %120, %121;
sub.f32 f347, %120, %121;
mul.f32 f348, f347, 0f3F5DB3D7;
add.f32 f349, f348, f346;
sub.f32 f350, f346, f348;
add.f32 f2373, %122, f2375;
mul.f32 f351, f2375, 0f3F000000;
sub.f32 f352, %122, f351;
sub.f32 f353, %79, %97;
mul.f32 f354, f353, 0f3F5DB3D7;
sub.f32 f355, f352, f354;
add.f32 f356, f354, f352;
add.f32 f357, %85, %103;
add.f32 f358, %67, f357;
mul.f32 f361, f357, 0f3F000000;
sub.f32 f362, %67, f361;
add.f32 f2370, %124, %123;
sub.f32 f363, %124, %123;
mul.f32 f364, f363, 0f3F5DB3D7;
add.f32 f365, f364, f362;
sub.f32 f366, f362, f364;
add.f32 f2368, %125, f2370;
mul.f32 f367, f2370, 0f3F000000;
sub.f32 f368, %125, f367;
sub.f32 f369, %85, %103;
mul.f32 f370, f369, 0f3F5DB3D7;
sub.f32 f371, f368, f370;
add.f32 f372, f370, f368;
add.f32 f373, %91, %109;
add.f32 f374, %73, f373;
mul.f32 f377, f373, 0f3F000000;
sub.f32 f378, %73, f377;
add.f32 f2366, %126, %110;
sub.f32 f379, %126, %110;
mul.f32 f380, f379, 0f3F5DB3D7;
add.f32 f381, f380, f378;
sub.f32 f382, f378, f380;
add.f32 f2364, %127, f2366;
mul.f32 f383, f2366, 0f3F000000;
sub.f32 f384, %127, f383;
sub.f32 f385, %91, %109;
mul.f32 f386, f385, 0f3F5DB3D7;
sub.f32 f387, f384, f386;
add.f32 f388, f386, f384;
mul.f32 f390, f371, 0fBF248DBB;
mul.f32 f2363, f365, 0f3F441B7D;
sub.f32 f391, f2363, f390;
mul.f32 f392, f371, 0f3F441B7D;
fma.rn.f32 f393, f365, 0fBF248DBB, f392;
mul.f32 f395, f387, 0fBF7C1C5C;
mul.f32 f2362, f381, 0f3E31D0D4;
sub.f32 f396, f2362, f395;
mul.f32 f397, f387, 0f3E31D0D4;
fma.rn.f32 f398, f381, 0fBF7C1C5C, f397;
mul.f32 f2360, f366, 0f3E31D0D4;
mul.f32 f2361, f372, 0fBF7C1C5C;
sub.f32 f401, f2360, f2361;
mul.f32 f402, f372, 0f3E31D0D4;
fma.rn.f32 f403, f366, 0fBF7C1C5C, f402;
mul.f32 f2358, f382, 0fBF708FB2;
mul.f32 f2359, f388, 0fBEAF1D44;
sub.f32 f406, f2358, f2359;
mul.f32 f407, f388, 0fBF708FB2;
fma.rn.f32 f408, f382, 0fBEAF1D44, f407;
add.f32 f409, f358, f374;
add.f32 f410, f342, f409;
mul.f32 f413, f409, 0f3F000000;
sub.f32 f414, f342, f413;
add.f32 f2357, f2368, f2364;
sub.f32 f415, f2368, f2364;
mul.f32 f416, f415, 0f3F5DB3D7;
add.f32 f417, f416, f414;
sub.f32 f418, f414, f416;
add.f32 f2356, f2373, f2357;
mul.f32 f419, f2357, 0f3F000000;
sub.f32 f420, f2373, f419;
sub.f32 f421, f358, f374;
mul.f32 f422, f421, 0f3F5DB3D7;
sub.f32 f423, f420, f422;
add.f32 f424, f422, f420;
add.f32 f425, f391, f396;
add.f32 f426, f349, f425;
mul.f32 f429, f425, 0f3F000000;
sub.f32 f430, f349, f429;
add.f32 f2355, f393, f398;
sub.f32 f431, f393, f398;
mul.f32 f432, f431, 0f3F5DB3D7;
add.f32 f433, f432, f430;
sub.f32 f434, f430, f432;
add.f32 f2354, f355, f2355;
mul.f32 f435, f2355, 0f3F000000;
sub.f32 f436, f355, f435;
sub.f32 f437, f391, f396;
mul.f32 f438, f437, 0f3F5DB3D7;
sub.f32 f439, f436, f438;
add.f32 f440, f438, f436;
add.f32 f441, f401, f406;
add.f32 f442, f350, f441;
mul.f32 f445, f441, 0f3F000000;
sub.f32 f446, f350, f445;
add.f32 f2353, f403, f408;
sub.f32 f447, f403, f408;
mul.f32 f448, f447, 0f3F5DB3D7;
add.f32 f449, f448, f446;
sub.f32 f450, f446, f448;
add.f32 f2352, f356, f2353;
mul.f32 f451, f2353, 0f3F000000;
sub.f32 f452, f356, f451;
sub.f32 f453, f401, f406;
mul.f32 f454, f453, 0f3F5DB3D7;
sub.f32 f455, f452, f454;
add.f32 f456, f454, f452;
mul.f32 f458, f2380, 0fBE6C2691;
mul.f32 f2351, f310, 0f3F791978;
sub.f32 f459, f2351, f458;
mul.f32 f460, f2380, 0f3F791978;
fma.rn.f32 f461, f310, 0fBE6C2691, f460;
mul.f32 f2349, f426, 0f3F64C51C;
mul.f32 f2350, f2354, 0fBEE5C902;
sub.f32 f464, f2349, f2350;
mul.f32 f465, f2354, 0f3F64C51C;
fma.rn.f32 f466, f426, 0fBEE5C902, f465;
mul.f32 f2347, f326, 0f3F64C51C;
mul.f32 f2348, f2378, 0fBEE5C902;
sub.f32 f469, f2347, f2348;
mul.f32 f470, f2378, 0f3F64C51C;
fma.rn.f32 f471, f326, 0fBEE5C902, f470;
mul.f32 f2345, f442, 0f3F18DF63;
mul.f32 f2346, f2352, 0fBF4D57F2;
sub.f32 f474, f2345, f2346;
mul.f32 f475, f2352, 0f3F18DF63;
fma.rn.f32 f476, f442, 0fBF4D57F2, f475;
mul.f32 f2343, f301, 0f3F441B7D;
mul.f32 f2344, f307, 0fBF248DBB;
sub.f32 f479, f2343, f2344;
mul.f32 f480, f307, 0f3F441B7D;
fma.rn.f32 f481, f301, 0fBF248DBB, f480;
mul.f32 f483, f423, 0fBF7C1C5C;
mul.f32 f2342, f417, 0f3E31D0D4;
sub.f32 f484, f2342, f483;
mul.f32 f485, f423, 0f3E31D0D4;
fma.rn.f32 f486, f417, 0fBF7C1C5C, f485;
mul.f32 f488, f323, 0fBF4D57F2;
mul.f32 f2341, f317, 0f3F18DF63;
sub.f32 f489, f2341, f488;
mul.f32 f490, f323, 0f3F18DF63;
fma.rn.f32 f491, f317, 0fBF4D57F2, f490;
mul.f32 f493, f439, 0fBF753ECD;
mul.f32 f2340, f433, 0fBE92D7E0;
sub.f32 f494, f2340, f493;
mul.f32 f495, f439, 0fBE92D7E0;
fma.rn.f32 f496, f433, 0fBF753ECD, f495;
mul.f32 f498, f339, 0fBF6B1036;
mul.f32 f2339, f333, 0f3ECACAF8;
sub.f32 f499, f2339, f498;
mul.f32 f500, f339, 0f3ECACAF8;
fma.rn.f32 f501, f333, 0fBF6B1036, f500;
mul.f32 f503, f455, 0fBF3A3529;
mul.f32 f2338, f449, 0fBF2FAD88;
sub.f32 f504, f2338, f503;
mul.f32 f505, f455, 0fBF2FAD88;
fma.rn.f32 f506, f449, 0fBF3A3529, f505;
mul.f32 f508, f308, 0fBF7C1C5C;
mul.f32 f2337, f302, 0f3E31D0D4;
sub.f32 f509, f2337, f508;
mul.f32 f510, f308, 0f3E31D0D4;
fma.rn.f32 f511, f302, 0fBF7C1C5C, f510;
mul.f32 f2335, f418, 0fBF708FB2;
mul.f32 f2336, f424, 0fBEAF1D44;
sub.f32 f514, f2335, f2336;
mul.f32 f515, f424, 0fBF708FB2;
fma.rn.f32 f516, f418, 0fBEAF1D44, f515;
mul.f32 f2333, f318, 0fBD6E2946;
mul.f32 f2334, f324, 0fBF7F9120;
sub.f32 f519, f2333, f2334;
mul.f32 f520, f324, 0fBD6E2946;
fma.rn.f32 f521, f318, 0fBF7F9120, f520;
mul.f32 f2331, f434, 0fBF7E44DE;
mul.f32 f2332, f440, 0f3DEDC21F;
sub.f32 f524, f2331, f2332;
mul.f32 f525, f440, 0fBF7E44DE;
fma.rn.f32 f526, f434, 0f3DEDC21F, f525;
mul.f32 f528, f340, 0fBF753ECD;
mul.f32 f2330, f334, 0fBE92D7E0;
sub.f32 f529, f2330, f528;
mul.f32 f530, f340, 0fBE92D7E0;
fma.rn.f32 f531, f334, 0fBF753ECD, f530;
mul.f32 f533, f456, 0f3F0CAC9F;
mul.f32 f2329, f450, 0fBF55E287;
sub.f32 f534, f2329, f533;
mul.f32 f535, f456, 0fBF55E287;
fma.rn.f32 f536, f450, 0f3F0CAC9F, f535;
add.f32 f537, f294, f410;
add.f32 f538, f178, f537;
mul.f32 f541, f537, 0f3F000000;
sub.f32 f542, f178, f541;
add.f32 f2328, f2382, f2356;
sub.f32 f543, f2382, f2356;
mul.f32 f544, f543, 0f3F5DB3D7;
add.f32 f545, f544, f542;
sub.f32 f546, f542, f544;
add.f32 f2327, f2409, f2328;
mul.f32 f547, f2328, 0f3F000000;
sub.f32 f548, f2409, f547;
sub.f32 f549, f294, f410;
mul.f32 f550, f549, 0f3F5DB3D7;
sub.f32 f551, f548, f550;
add.f32 f552, f550, f548;
add.f32 f553, f459, f464;
add.f32 f554, f194, f553;
mul.f32 f557, f553, 0f3F000000;
sub.f32 f558, f194, f557;
add.f32 f2326, f461, f466;
sub.f32 f559, f461, f466;
mul.f32 f560, f559, 0f3F5DB3D7;
add.f32 f561, f560, f558;
sub.f32 f562, f558, f560;
add.f32 f2325, f2407, f2326;
mul.f32 f563, f2326, 0f3F000000;
sub.f32 f564, f2407, f563;
sub.f32 f565, f459, f464;
mul.f32 f566, f565, 0f3F5DB3D7;
sub.f32 f567, f564, f566;
add.f32 f568, f566, f564;
add.f32 f569, f469, f474;
add.f32 f570, f210, f569;
mul.f32 f573, f569, 0f3F000000;
sub.f32 f574, f210, f573;
add.f32 f2324, f471, f476;
sub.f32 f575, f471, f476;
mul.f32 f576, f575, 0f3F5DB3D7;
add.f32 f577, f576, f574;
sub.f32 f578, f574, f576;
add.f32 f2323, f2405, f2324;
mul.f32 f579, f2324, 0f3F000000;
sub.f32 f580, f2405, f579;
sub.f32 f581, f469, f474;
mul.f32 f582, f581, 0f3F5DB3D7;
sub.f32 f583, f580, f582;
add.f32 f584, f582, f580;
add.f32 f585, f479, f484;
add.f32 f586, f185, f585;
mul.f32 f589, f585, 0f3F000000;
sub.f32 f590, f185, f589;
add.f32 f2322, f481, f486;
sub.f32 f591, f481, f486;
mul.f32 f592, f591, 0f3F5DB3D7;
add.f32 f593, f592, f590;
sub.f32 f594, f590, f592;
add.f32 f2321, f191, f2322;
mul.f32 f595, f2322, 0f3F000000;
sub.f32 f596, f191, f595;
sub.f32 f597, f479, f484;
mul.f32 f598, f597, 0f3F5DB3D7;
sub.f32 f599, f596, f598;
add.f32 f600, f598, f596;
add.f32 f601, f489, f494;
add.f32 f602, f201, f601;
mul.f32 f605, f601, 0f3F000000;
sub.f32 f606, f201, f605;
add.f32 f2320, f491, f496;
sub.f32 f607, f491, f496;
mul.f32 f608, f607, 0f3F5DB3D7;
add.f32 f609, f608, f606;
sub.f32 f610, f606, f608;
add.f32 f2319, f207, f2320;
mul.f32 f611, f2320, 0f3F000000;
sub.f32 f612, f207, f611;
sub.f32 f613, f489, f494;
mul.f32 f614, f613, 0f3F5DB3D7;
sub.f32 f615, f612, f614;
add.f32 f616, f614, f612;
add.f32 f617, f499, f504;
add.f32 f618, f217, f617;
mul.f32 f621, f617, 0f3F000000;
sub.f32 f622, f217, f621;
add.f32 f2318, f501, f506;
sub.f32 f623, f501, f506;
mul.f32 f624, f623, 0f3F5DB3D7;
add.f32 f625, f624, f622;
sub.f32 f626, f622, f624;
add.f32 f2317, f223, f2318;
mul.f32 f627, f2318, 0f3F000000;
sub.f32 f628, f223, f627;
sub.f32 f629, f499, f504;
mul.f32 f630, f629, 0f3F5DB3D7;
sub.f32 f631, f628, f630;
add.f32 f632, f630, f628;
add.f32 f633, f509, f514;
add.f32 f634, f186, f633;
mul.f32 f637, f633, 0f3F000000;
sub.f32 f638, f186, f637;
add.f32 f2316, f511, f516;
sub.f32 f639, f511, f516;
mul.f32 f640, f639, 0f3F5DB3D7;
add.f32 f641, f640, f638;
sub.f32 f642, f638, f640;
add.f32 f2315, f192, f2316;
mul.f32 f643, f2316, 0f3F000000;
sub.f32 f644, f192, f643;
sub.f32 f645, f509, f514;
mul.f32 f646, f645, 0f3F5DB3D7;
sub.f32 f647, f644, f646;
add.f32 f648, f646, f644;
add.f32 f649, f519, f524;
add.f32 f650, f202, f649;
mul.f32 f653, f649, 0f3F000000;
sub.f32 f654, f202, f653;
add.f32 f2314, f521, f526;
sub.f32 f655, f521, f526;
mul.f32 f656, f655, 0f3F5DB3D7;
add.f32 f657, f656, f654;
sub.f32 f658, f654, f656;
add.f32 f2313, f208, f2314;
mul.f32 f659, f2314, 0f3F000000;
sub.f32 f660, f208, f659;
sub.f32 f661, f519, f524;
mul.f32 f662, f661, 0f3F5DB3D7;
sub.f32 f663, f660, f662;
add.f32 f664, f662, f660;
add.f32 f665, f529, f534;
add.f32 f666, f218, f665;
mul.f32 f669, f665, 0f3F000000;
sub.f32 f670, f218, f669;
add.f32 f2312, f531, f536;
sub.f32 f671, f531, f536;
mul.f32 f672, f671, 0f3F5DB3D7;
add.f32 f673, f672, f670;
sub.f32 f674, f670, f672;
add.f32 f2311, f224, f2312;
mul.f32 f675, f2312, 0f3F000000;
sub.f32 f676, f224, f675;
sub.f32 f677, f529, f534;
mul.f32 f678, f677, 0f3F5DB3D7;
sub.f32 f679, f676, f678;
add.f32 f680, f678, f676;
mov.u32 r21, %tid.x;
mul.wide.u32 rd2, r21, -901412889;
shr.u64 rd3, rd2, 38;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 81;
sub.s32 r7, r21, r6;
mov.u64 rd5, %55;
mul.wide.u32 rd14, r7, 8;
add.s64 rd6, rd5, rd14;
ld.global.v2.f32 {f681, f682}, [rd6];
mul.f32 f686, f682, f2325;
mul.f32 f2310, f681, f554;
sub.f32 f687, f2310, f686;
mul.f32 f688, f681, f2325;
fma.rn.f32 f689, f682, f554, f688;
mul.f32 f691, f682, f682;
mul.f32 f2309, f681, f681;
sub.f32 f692, f2309, f691;
mul.f32 f693, f682, f681;
fma.rn.f32 f694, f682, f681, f693;
mul.f32 f696, f694, f2323;
mul.f32 f2308, f692, f570;
sub.f32 f697, f2308, f696;
mul.f32 f698, f692, f2323;
fma.rn.f32 f699, f694, f570, f698;
mul.f32 f701, f682, f694;
mul.f32 f2307, f681, f692;
sub.f32 f702, f2307, f701;
mul.f32 f703, f681, f694;
fma.rn.f32 f704, f682, f692, f703;
mul.f32 f706, f704, f2321;
mul.f32 f2306, f702, f586;
sub.f32 f707, f2306, f706;
mul.f32 f708, f702, f2321;
fma.rn.f32 f709, f704, f586, f708;
mul.f32 f2304, f681, f702;
mul.f32 f2305, f682, f704;
sub.f32 f712, f2304, f2305;
mul.f32 f713, f681, f704;
fma.rn.f32 f714, f682, f702, f713;
mul.f32 f2302, f712, f602;
mul.f32 f2303, f714, f2319;
sub.f32 f717, f2302, f2303;
mul.f32 f718, f712, f2319;
fma.rn.f32 f719, f714, f602, f718;
mul.f32 f2300, f681, f712;
mul.f32 f2301, f682, f714;
sub.f32 f722, f2300, f2301;
mul.f32 f723, f681, f714;
fma.rn.f32 f724, f682, f712, f723;
mul.f32 f2298, f722, f618;
mul.f32 f2299, f724, f2317;
sub.f32 f727, f2298, f2299;
mul.f32 f728, f722, f2317;
fma.rn.f32 f729, f724, f618, f728;
mul.f32 f731, f682, f724;
mul.f32 f2297, f681, f722;
sub.f32 f732, f2297, f731;
mul.f32 f733, f681, f724;
fma.rn.f32 f734, f682, f722, f733;
mul.f32 f736, f734, f2315;
mul.f32 f2296, f732, f634;
sub.f32 f737, f2296, f736;
mul.f32 f738, f732, f2315;
fma.rn.f32 f739, f734, f634, f738;
mul.f32 f741, f682, f734;
mul.f32 f2295, f681, f732;
sub.f32 f742, f2295, f741;
mul.f32 f743, f681, f734;
fma.rn.f32 f744, f682, f732, f743;
mul.f32 f746, f744, f2313;
mul.f32 f2294, f742, f650;
sub.f32 f747, f2294, f746;
mul.f32 f748, f742, f2313;
fma.rn.f32 f749, f744, f650, f748;
mul.f32 f751, f682, f744;
mul.f32 f2293, f681, f742;
sub.f32 f752, f2293, f751;
mul.f32 f753, f681, f744;
fma.rn.f32 f754, f682, f742, f753;
mul.f32 f756, f754, f2311;
mul.f32 f2292, f752, f666;
sub.f32 f757, f2292, f756;
mul.f32 f758, f752, f2311;
fma.rn.f32 f759, f754, f666, f758;
mul.f32 f2290, f681, f752;
mul.f32 f2291, f682, f754;
sub.f32 f762, f2290, f2291;
mul.f32 f763, f681, f754;
fma.rn.f32 f764, f682, f752, f763;
mul.f32 f2288, f762, f545;
mul.f32 f2289, f764, f551;
sub.f32 f767, f2288, f2289;
mul.f32 f768, f762, f551;
fma.rn.f32 f769, f764, f545, f768;
mul.f32 f2286, f681, f762;
mul.f32 f2287, f682, f764;
sub.f32 f772, f2286, f2287;
mul.f32 f773, f681, f764;
fma.rn.f32 f774, f682, f762, f773;
mul.f32 f776, f774, f567;
mul.f32 f2285, f772, f561;
sub.f32 f777, f2285, f776;
mul.f32 f778, f772, f567;
fma.rn.f32 f779, f774, f561, f778;
mul.f32 f781, f682, f774;
mul.f32 f2284, f681, f772;
sub.f32 f782, f2284, f781;
mul.f32 f783, f681, f774;
fma.rn.f32 f784, f682, f772, f783;
mul.f32 f786, f784, f583;
mul.f32 f2283, f782, f577;
sub.f32 f787, f2283, f786;
mul.f32 f788, f782, f583;
fma.rn.f32 f789, f784, f577, f788;
mul.f32 f791, f682, f784;
mul.f32 f2282, f681, f782;
sub.f32 f792, f2282, f791;
mul.f32 f793, f681, f784;
fma.rn.f32 f794, f682, f782, f793;
mul.f32 f796, f794, f599;
mul.f32 f2281, f792, f593;
sub.f32 f797, f2281, f796;
mul.f32 f798, f792, f599;
fma.rn.f32 f799, f794, f593, f798;
mul.f32 f801, f682, f794;
mul.f32 f2280, f681, f792;
sub.f32 f802, f2280, f801;
mul.f32 f803, f681, f794;
fma.rn.f32 f804, f682, f792, f803;
mul.f32 f2278, f802, f609;
mul.f32 f2279, f804, f615;
sub.f32 f807, f2278, f2279;
mul.f32 f808, f802, f615;
fma.rn.f32 f809, f804, f609, f808;
mul.f32 f2276, f681, f802;
mul.f32 f2277, f682, f804;
sub.f32 f812, f2276, f2277;
mul.f32 f813, f681, f804;
fma.rn.f32 f814, f682, f802, f813;
mul.f32 f2274, f812, f625;
mul.f32 f2275, f814, f631;
sub.f32 f817, f2274, f2275;
mul.f32 f818, f812, f631;
fma.rn.f32 f819, f814, f625, f818;
mul.f32 f2272, f681, f812;
mul.f32 f2273, f682, f814;
sub.f32 f822, f2272, f2273;
mul.f32 f823, f681, f814;
fma.rn.f32 f824, f682, f812, f823;
mul.f32 f826, f824, f647;
mul.f32 f2271, f822, f641;
sub.f32 f827, f2271, f826;
mul.f32 f828, f822, f647;
fma.rn.f32 f829, f824, f641, f828;
mul.f32 f831, f682, f824;
mul.f32 f2270, f681, f822;
sub.f32 f832, f2270, f831;
mul.f32 f833, f681, f824;
fma.rn.f32 f834, f682, f822, f833;
mul.f32 f836, f834, f663;
mul.f32 f2269, f832, f657;
sub.f32 f837, f2269, f836;
mul.f32 f838, f832, f663;
fma.rn.f32 f839, f834, f657, f838;
mul.f32 f841, f682, f834;
mul.f32 f2268, f681, f832;
sub.f32 f842, f2268, f841;
mul.f32 f843, f681, f834;
fma.rn.f32 f844, f682, f832, f843;
mul.f32 f846, f844, f679;
mul.f32 f2267, f842, f673;
sub.f32 f847, f2267, f846;
mul.f32 f848, f842, f679;
fma.rn.f32 f849, f844, f673, f848;
mul.f32 f2265, f681, f842;
mul.f32 f2266, f682, f844;
sub.f32 f852, f2265, f2266;
mul.f32 f853, f681, f844;
fma.rn.f32 f854, f682, f842, f853;
mul.f32 f2263, f852, f546;
mul.f32 f2264, f854, f552;
sub.f32 f857, f2263, f2264;
mul.f32 f858, f852, f552;
fma.rn.f32 f859, f854, f546, f858;
mul.f32 f2261, f681, f852;
mul.f32 f2262, f682, f854;
sub.f32 f862, f2261, f2262;
mul.f32 f863, f681, f854;
fma.rn.f32 f864, f682, f852, f863;
mul.f32 f2259, f862, f562;
mul.f32 f2260, f864, f568;
sub.f32 f867, f2259, f2260;
mul.f32 f868, f862, f568;
fma.rn.f32 f869, f864, f562, f868;
mul.f32 f871, f682, f864;
mul.f32 f2258, f681, f862;
sub.f32 f872, f2258, f871;
mul.f32 f873, f681, f864;
fma.rn.f32 f874, f682, f862, f873;
mul.f32 f876, f874, f584;
mul.f32 f2257, f872, f578;
sub.f32 f877, f2257, f876;
mul.f32 f878, f872, f584;
fma.rn.f32 f879, f874, f578, f878;
mul.f32 f881, f682, f874;
mul.f32 f2256, f681, f872;
sub.f32 f882, f2256, f881;
mul.f32 f883, f681, f874;
fma.rn.f32 f884, f682, f872, f883;
mul.f32 f886, f884, f600;
mul.f32 f2255, f882, f594;
sub.f32 f887, f2255, f886;
mul.f32 f888, f882, f600;
fma.rn.f32 f889, f884, f594, f888;
mul.f32 f891, f682, f884;
mul.f32 f2254, f681, f882;
sub.f32 f892, f2254, f891;
mul.f32 f893, f681, f884;
fma.rn.f32 f894, f682, f882, f893;
mul.f32 f2252, f892, f610;
mul.f32 f2253, f894, f616;
sub.f32 f897, f2252, f2253;
mul.f32 f898, f892, f616;
fma.rn.f32 f899, f894, f610, f898;
mul.f32 f2250, f681, f892;
mul.f32 f2251, f682, f894;
sub.f32 f902, f2250, f2251;
mul.f32 f903, f681, f894;
fma.rn.f32 f904, f682, f892, f903;
mul.f32 f2248, f902, f626;
mul.f32 f2249, f904, f632;
sub.f32 f907, f2248, f2249;
mul.f32 f908, f902, f632;
fma.rn.f32 f909, f904, f626, f908;
mul.f32 f2246, f681, f902;
mul.f32 f2247, f682, f904;
sub.f32 f912, f2246, f2247;
mul.f32 f913, f681, f904;
fma.rn.f32 f914, f682, f902, f913;
mul.f32 f916, f914, f648;
mul.f32 f2245, f912, f642;
sub.f32 f917, f2245, f916;
mul.f32 f918, f912, f648;
fma.rn.f32 f919, f914, f642, f918;
mul.f32 f921, f682, f914;
mul.f32 f2244, f681, f912;
sub.f32 f922, f2244, f921;
mul.f32 f923, f681, f914;
fma.rn.f32 f924, f682, f912, f923;
mul.f32 f926, f924, f664;
mul.f32 f2243, f922, f658;
sub.f32 f927, f2243, f926;
mul.f32 f928, f922, f664;
fma.rn.f32 f929, f924, f658, f928;
mul.f32 f931, f682, f924;
mul.f32 f2242, f681, f922;
sub.f32 f932, f2242, f931;
mul.f32 f933, f681, f924;
fma.rn.f32 f934, f682, f922, f933;
mul.f32 f936, f934, f680;
mul.f32 f2241, f932, f674;
sub.f32 f937, f2241, f936;
mul.f32 f938, f932, f680;
fma.rn.f32 f939, f934, f674, f938;
mad.lo.s32 r8, r5, 8748, r3;
barrier.sync 0;
mad.lo.s32 r9, r7, 108, r8;
st.shared.f32 [r9], f538;
st.shared.f32 [r9+4], f687;
st.shared.f32 [r9+8], f697;
st.shared.f32 [r9+12], f707;
st.shared.f32 [r9+16], f717;
st.shared.f32 [r9+20], f727;
st.shared.f32 [r9+24], f737;
st.shared.f32 [r9+28], f747;
st.shared.f32 [r9+32], f757;
st.shared.f32 [r9+36], f767;
st.shared.f32 [r9+40], f777;
st.shared.f32 [r9+44], f787;
st.shared.f32 [r9+48], f797;
st.shared.f32 [r9+52], f807;
st.shared.f32 [r9+56], f817;
st.shared.f32 [r9+60], f827;
st.shared.f32 [r9+64], f837;
st.shared.f32 [r9+68], f847;
st.shared.f32 [r9+72], f857;
st.shared.f32 [r9+76], f867;
st.shared.f32 [r9+80], f877;
st.shared.f32 [r9+84], f887;
st.shared.f32 [r9+88], f897;
st.shared.f32 [r9+92], f907;
st.shared.f32 [r9+96], f917;
st.shared.f32 [r9+100], f927;
st.shared.f32 [r9+104], f937;
barrier.sync 0;
mad.lo.s32 r10, r7, -104, r9;
ld.shared.f32 f940, [r10];
ld.shared.f32 f941, [r10+324];
ld.shared.f32 f942, [r10+648];
ld.shared.f32 f943, [r10+972];
ld.shared.f32 f944, [r10+1296];
ld.shared.f32 f945, [r10+1620];
ld.shared.f32 f946, [r10+1944];
ld.shared.f32 f947, [r10+2268];
ld.shared.f32 f948, [r10+2592];
ld.shared.f32 f949, [r10+2916];
ld.shared.f32 f950, [r10+3240];
ld.shared.f32 f951, [r10+3564];
ld.shared.f32 f952, [r10+3888];
ld.shared.f32 f953, [r10+4212];
ld.shared.f32 f954, [r10+4536];
ld.shared.f32 f955, [r10+4860];
ld.shared.f32 f956, [r10+5184];
ld.shared.f32 f957, [r10+5508];
ld.shared.f32 f958, [r10+5832];
ld.shared.f32 f959, [r10+6156];
ld.shared.f32 f960, [r10+6480];
ld.shared.f32 f961, [r10+6804];
ld.shared.f32 f962, [r10+7128];
ld.shared.f32 f963, [r10+7452];
ld.shared.f32 f964, [r10+7776];
ld.shared.f32 f965, [r10+8100];
ld.shared.f32 f966, [r10+8424];
barrier.sync 0;
st.shared.f32 [r9], f2327;
st.shared.f32 [r9+4], f689;
st.shared.f32 [r9+8], f699;
st.shared.f32 [r9+12], f709;
st.shared.f32 [r9+16], f719;
st.shared.f32 [r9+20], f729;
st.shared.f32 [r9+24], f739;
st.shared.f32 [r9+28], f749;
st.shared.f32 [r9+32], f759;
st.shared.f32 [r9+36], f769;
st.shared.f32 [r9+40], f779;
st.shared.f32 [r9+44], f789;
st.shared.f32 [r9+48], f799;
st.shared.f32 [r9+52], f809;
st.shared.f32 [r9+56], f819;
st.shared.f32 [r9+60], f829;
st.shared.f32 [r9+64], f839;
st.shared.f32 [r9+68], f849;
st.shared.f32 [r9+72], f859;
st.shared.f32 [r9+76], f869;
st.shared.f32 [r9+80], f879;
st.shared.f32 [r9+84], f889;
st.shared.f32 [r9+88], f899;
st.shared.f32 [r9+92], f909;
st.shared.f32 [r9+96], f919;
st.shared.f32 [r9+100], f929;
st.shared.f32 [r9+104], f939;
barrier.sync 0;
add.f32 f994, f949, f958;
add.f32 f995, f940, f994;
mul.f32 f998, f994, 0f3F000000;
sub.f32 f999, f940, f998;
ld.shared.f32 f2240, [r10+5832];
ld.shared.f32 f2239, [r10+2916];
add.f32 f2238, f2239, f2240;
sub.f32 f1000, f2239, f2240;
mul.f32 f1001, f1000, 0f3F5DB3D7;
add.f32 f1002, f1001, f999;
sub.f32 f1003, f999, f1001;
ld.shared.f32 f2237, [r10];
add.f32 f2236, f2237, f2238;
mul.f32 f1004, f2238, 0f3F000000;
sub.f32 f1005, f2237, f1004;
sub.f32 f1006, f949, f958;
mul.f32 f1007, f1006, 0f3F5DB3D7;
sub.f32 f1008, f1005, f1007;
add.f32 f1009, f1007, f1005;
add.f32 f1010, f952, f961;
add.f32 f1011, f943, f1010;
mul.f32 f1014, f1010, 0f3F000000;
ld.shared.f32 f2235, [r10+6804];
sub.f32 f1015, f943, f1014;
ld.shared.f32 f2234, [r10+3888];
add.f32 f2233, f2234, f2235;
sub.f32 f1016, f2234, f2235;
mul.f32 f1017, f1016, 0f3F5DB3D7;
ld.shared.f32 f2232, [r10+972];
add.f32 f1018, f1017, f1015;
sub.f32 f1019, f1015, f1017;
add.f32 f2231, f2232, f2233;
mul.f32 f1020, f2233, 0f3F000000;
sub.f32 f1021, f2232, f1020;
sub.f32 f1022, f952, f961;
mul.f32 f1023, f1022, 0f3F5DB3D7;
sub.f32 f1024, f1021, f1023;
add.f32 f1025, f1023, f1021;
add.f32 f1026, f955, f964;
add.f32 f1027, f946, f1026;
mul.f32 f1030, f1026, 0f3F000000;
ld.shared.f32 f2230, [r10+7776];
ld.shared.f32 f2229, [r10+4860];
sub.f32 f1031, f946, f1030;
add.f32 f2228, f2229, f2230;
sub.f32 f1032, f2229, f2230;
mul.f32 f1033, f1032, 0f3F5DB3D7;
ld.shared.f32 f2227, [r10+1944];
add.f32 f1034, f1033, f1031;
sub.f32 f1035, f1031, f1033;
add.f32 f2226, f2227, f2228;
mul.f32 f1036, f2228, 0f3F000000;
sub.f32 f1037, f2227, f1036;
sub.f32 f1038, f955, f964;
mul.f32 f1039, f1038, 0f3F5DB3D7;
sub.f32 f1040, f1037, f1039;
add.f32 f1041, f1039, f1037;
mul.f32 f2224, f1018, 0f3F441B7D;
mul.f32 f2225, f1024, 0fBF248DBB;
sub.f32 f1044, f2224, f2225;
mul.f32 f1045, f1024, 0f3F441B7D;
fma.rn.f32 f1046, f1018, 0fBF248DBB, f1045;
mul.f32 f1048, f1040, 0fBF7C1C5C;
mul.f32 f2223, f1034, 0f3E31D0D4;
sub.f32 f1049, f2223, f1048;
mul.f32 f1050, f1040, 0f3E31D0D4;
fma.rn.f32 f1051, f1034, 0fBF7C1C5C, f1050;
mul.f32 f1053, f1025, 0fBF7C1C5C;
mul.f32 f2222, f1019, 0f3E31D0D4;
sub.f32 f1054, f2222, f1053;
mul.f32 f1055, f1025, 0f3E31D0D4;
fma.rn.f32 f1056, f1019, 0fBF7C1C5C, f1055;
mul.f32 f1058, f1041, 0fBEAF1D44;
mul.f32 f2221, f1035, 0fBF708FB2;
sub.f32 f1059, f2221, f1058;
mul.f32 f1060, f1041, 0fBF708FB2;
fma.rn.f32 f1061, f1035, 0fBEAF1D44, f1060;
add.f32 f1062, f1011, f1027;
add.f32 f1063, f995, f1062;
mul.f32 f1066, f1062, 0f3F000000;
sub.f32 f1067, f995, f1066;
add.f32 f2220, f2231, f2226;
sub.f32 f1068, f2231, f2226;
mul.f32 f1069, f1068, 0f3F5DB3D7;
add.f32 f1070, f1069, f1067;
sub.f32 f1071, f1067, f1069;
add.f32 f2219, f2236, f2220;
mul.f32 f1072, f2220, 0f3F000000;
sub.f32 f1073, f2236, f1072;
sub.f32 f1074, f1011, f1027;
mul.f32 f1075, f1074, 0f3F5DB3D7;
sub.f32 f1076, f1073, f1075;
add.f32 f1077, f1075, f1073;
add.f32 f1078, f1044, f1049;
add.f32 f1079, f1002, f1078;
mul.f32 f1082, f1078, 0f3F000000;
sub.f32 f1083, f1002, f1082;
add.f32 f2218, f1046, f1051;
sub.f32 f1084, f1046, f1051;
mul.f32 f1085, f1084, 0f3F5DB3D7;
add.f32 f1086, f1085, f1083;
sub.f32 f1087, f1083, f1085;
add.f32 f2217, f1008, f2218;
mul.f32 f1088, f2218, 0f3F000000;
sub.f32 f1089, f1008, f1088;
sub.f32 f1090, f1044, f1049;
mul.f32 f1091, f1090, 0f3F5DB3D7;
sub.f32 f1092, f1089, f1091;
add.f32 f1093, f1091, f1089;
add.f32 f1094, f1054, f1059;
add.f32 f1095, f1003, f1094;
mul.f32 f1098, f1094, 0f3F000000;
sub.f32 f1099, f1003, f1098;
add.f32 f2216, f1056, f1061;
sub.f32 f1100, f1056, f1061;
mul.f32 f1101, f1100, 0f3F5DB3D7;
add.f32 f1102, f1101, f1099;
sub.f32 f1103, f1099, f1101;
add.f32 f2215, f1009, f2216;
mul.f32 f1104, f2216, 0f3F000000;
sub.f32 f1105, f1009, f1104;
sub.f32 f1106, f1054, f1059;
mul.f32 f1107, f1106, 0f3F5DB3D7;
sub.f32 f1108, f1105, f1107;
add.f32 f1109, f1107, f1105;
add.f32 f1110, f950, f959;
add.f32 f1111, f941, f1110;
mul.f32 f1114, f1110, 0f3F000000;
sub.f32 f1115, f941, f1114;
ld.shared.f32 f2214, [r10+6156];
ld.shared.f32 f2213, [r10+3240];
add.f32 f2212, f2213, f2214;
sub.f32 f1116, f2213, f2214;
mul.f32 f1117, f1116, 0f3F5DB3D7;
add.f32 f1118, f1117, f1115;
sub.f32 f1119, f1115, f1117;
ld.shared.f32 f2211, [r10+324];
add.f32 f2210, f2211, f2212;
mul.f32 f1120, f2212, 0f3F000000;
sub.f32 f1121, f2211, f1120;
sub.f32 f1122, f950, f959;
mul.f32 f1123, f1122, 0f3F5DB3D7;
sub.f32 f1124, f1121, f1123;
add.f32 f1125, f1123, f1121;
add.f32 f1126, f953, f962;
add.f32 f1127, f944, f1126;
mul.f32 f1130, f1126, 0f3F000000;
ld.shared.f32 f2209, [r10+7128];
sub.f32 f1131, f944, f1130;
ld.shared.f32 f2208, [r10+4212];
add.f32 f2207, f2208, f2209;
sub.f32 f1132, f2208, f2209;
mul.f32 f1133, f1132, 0f3F5DB3D7;
add.f32 f1134, f1133, f1131;
sub.f32 f1135, f1131, f1133;
ld.shared.f32 f2206, [r10+1296];
add.f32 f2205, f2206, f2207;
mul.f32 f1136, f2207, 0f3F000000;
sub.f32 f1137, f2206, f1136;
sub.f32 f1138, f953, f962;
mul.f32 f1139, f1138, 0f3F5DB3D7;
sub.f32 f1140, f1137, f1139;
add.f32 f1141, f1139, f1137;
add.f32 f1142, f956, f965;
add.f32 f1143, f947, f1142;
mul.f32 f1146, f1142, 0f3F000000;
ld.shared.f32 f2204, [r10+5184];
sub.f32 f1147, f947, f1146;
ld.shared.f32 f2203, [r10+8100];
add.f32 f2202, f2204, f2203;
sub.f32 f1148, f2204, f2203;
mul.f32 f1149, f1148, 0f3F5DB3D7;
ld.shared.f32 f2201, [r10+2268];
add.f32 f1150, f1149, f1147;
sub.f32 f1151, f1147, f1149;
add.f32 f2200, f2201, f2202;
mul.f32 f1152, f2202, 0f3F000000;
sub.f32 f1153, f2201, f1152;
sub.f32 f1154, f956, f965;
mul.f32 f1155, f1154, 0f3F5DB3D7;
sub.f32 f1156, f1153, f1155;
add.f32 f1157, f1155, f1153;
mul.f32 f2198, f1134, 0f3F441B7D;
mul.f32 f2199, f1140, 0fBF248DBB;
sub.f32 f1160, f2198, f2199;
mul.f32 f1161, f1140, 0f3F441B7D;
fma.rn.f32 f1162, f1134, 0fBF248DBB, f1161;
mul.f32 f2196, f1150, 0f3E31D0D4;
mul.f32 f2197, f1156, 0fBF7C1C5C;
sub.f32 f1165, f2196, f2197;
mul.f32 f1166, f1156, 0f3E31D0D4;
fma.rn.f32 f1167, f1150, 0fBF7C1C5C, f1166;
mul.f32 f1169, f1141, 0fBF7C1C5C;
mul.f32 f2195, f1135, 0f3E31D0D4;
sub.f32 f1170, f2195, f1169;
mul.f32 f1171, f1141, 0f3E31D0D4;
fma.rn.f32 f1172, f1135, 0fBF7C1C5C, f1171;
mul.f32 f1174, f1157, 0fBEAF1D44;
mul.f32 f2194, f1151, 0fBF708FB2;
sub.f32 f1175, f2194, f1174;
mul.f32 f1176, f1157, 0fBF708FB2;
fma.rn.f32 f1177, f1151, 0fBEAF1D44, f1176;
add.f32 f1178, f1127, f1143;
add.f32 f1179, f1111, f1178;
mul.f32 f1182, f1178, 0f3F000000;
sub.f32 f1183, f1111, f1182;
add.f32 f2193, f2205, f2200;
sub.f32 f1184, f2205, f2200;
mul.f32 f1185, f1184, 0f3F5DB3D7;
add.f32 f1186, f1185, f1183;
sub.f32 f1187, f1183, f1185;
add.f32 f2192, f2210, f2193;
mul.f32 f1188, f2193, 0f3F000000;
sub.f32 f1189, f2210, f1188;
sub.f32 f1190, f1127, f1143;
mul.f32 f1191, f1190, 0f3F5DB3D7;
sub.f32 f1192, f1189, f1191;
add.f32 f1193, f1191, f1189;
add.f32 f1194, f1160, f1165;
add.f32 f1195, f1118, f1194;
mul.f32 f1198, f1194, 0f3F000000;
sub.f32 f1199, f1118, f1198;
add.f32 f2191, f1162, f1167;
sub.f32 f1200, f1162, f1167;
mul.f32 f1201, f1200, 0f3F5DB3D7;
add.f32 f1202, f1201, f1199;
sub.f32 f1203, f1199, f1201;
add.f32 f2190, f1124, f2191;
mul.f32 f1204, f2191, 0f3F000000;
sub.f32 f1205, f1124, f1204;
sub.f32 f1206, f1160, f1165;
mul.f32 f1207, f1206, 0f3F5DB3D7;
sub.f32 f1208, f1205, f1207;
add.f32 f1209, f1207, f1205;
add.f32 f1210, f1170, f1175;
add.f32 f1211, f1119, f1210;
mul.f32 f1214, f1210, 0f3F000000;
sub.f32 f1215, f1119, f1214;
add.f32 f2189, f1172, f1177;
sub.f32 f1216, f1172, f1177;
mul.f32 f1217, f1216, 0f3F5DB3D7;
add.f32 f1218, f1217, f1215;
sub.f32 f1219, f1215, f1217;
add.f32 f2188, f1125, f2189;
mul.f32 f1220, f2189, 0f3F000000;
sub.f32 f1221, f1125, f1220;
sub.f32 f1222, f1170, f1175;
mul.f32 f1223, f1222, 0f3F5DB3D7;
sub.f32 f1224, f1221, f1223;
add.f32 f1225, f1223, f1221;
add.f32 f1226, f951, f960;
add.f32 f1227, f942, f1226;
mul.f32 f1230, f1226, 0f3F000000;
ld.shared.f32 f2187, [r10+3564];
sub.f32 f1231, f942, f1230;
ld.shared.f32 f2186, [r10+6480];
add.f32 f2185, f2187, f2186;
sub.f32 f1232, f2187, f2186;
mul.f32 f1233, f1232, 0f3F5DB3D7;
ld.shared.f32 f2184, [r10+648];
add.f32 f1234, f1233, f1231;
sub.f32 f1235, f1231, f1233;
add.f32 f2183, f2184, f2185;
mul.f32 f1236, f2185, 0f3F000000;
sub.f32 f1237, f2184, f1236;
sub.f32 f1238, f951, f960;
mul.f32 f1239, f1238, 0f3F5DB3D7;
sub.f32 f1240, f1237, f1239;
add.f32 f1241, f1239, f1237;
add.f32 f1242, f954, f963;
add.f32 f1243, f945, f1242;
mul.f32 f1246, f1242, 0f3F000000;
sub.f32 f1247, f945, f1246;
ld.shared.f32 f2182, [r10+4536];
ld.shared.f32 f2181, [r10+7452];
add.f32 f2180, f2182, f2181;
sub.f32 f1248, f2182, f2181;
mul.f32 f1249, f1248, 0f3F5DB3D7;
add.f32 f1250, f1249, f1247;
sub.f32 f1251, f1247, f1249;
ld.shared.f32 f2179, [r10+1620];
add.f32 f2178, f2179, f2180;
mul.f32 f1252, f2180, 0f3F000000;
sub.f32 f1253, f2179, f1252;
sub.f32 f1254, f954, f963;
mul.f32 f1255, f1254, 0f3F5DB3D7;
sub.f32 f1256, f1253, f1255;
add.f32 f1257, f1255, f1253;
add.f32 f1258, f957, f966;
add.f32 f1259, f948, f1258;
mul.f32 f1262, f1258, 0f3F000000;
sub.f32 f1263, f948, f1262;
ld.shared.f32 f2177, [r10+8424];
ld.shared.f32 f2176, [r10+5508];
add.f32 f2175, f2176, f2177;
sub.f32 f1264, f2176, f2177;
mul.f32 f1265, f1264, 0f3F5DB3D7;
ld.shared.f32 f2174, [r10+2592];
add.f32 f1266, f1265, f1263;
sub.f32 f1267, f1263, f1265;
add.f32 f2173, f2174, f2175;
mul.f32 f1268, f2175, 0f3F000000;
sub.f32 f1269, f2174, f1268;
sub.f32 f1270, f957, f966;
mul.f32 f1271, f1270, 0f3F5DB3D7;
sub.f32 f1272, f1269, f1271;
add.f32 f1273, f1271, f1269;
mul.f32 f2171, f1250, 0f3F441B7D;
mul.f32 f2172, f1256, 0fBF248DBB;
sub.f32 f1276, f2171, f2172;
mul.f32 f1277, f1256, 0f3F441B7D;
fma.rn.f32 f1278, f1250, 0fBF248DBB, f1277;
mul.f32 f2169, f1266, 0f3E31D0D4;
mul.f32 f2170, f1272, 0fBF7C1C5C;
sub.f32 f1281, f2169, f2170;
mul.f32 f1282, f1272, 0f3E31D0D4;
fma.rn.f32 f1283, f1266, 0fBF7C1C5C, f1282;
mul.f32 f1285, f1257, 0fBF7C1C5C;
mul.f32 f2168, f1251, 0f3E31D0D4;
sub.f32 f1286, f2168, f1285;
mul.f32 f1287, f1257, 0f3E31D0D4;
fma.rn.f32 f1288, f1251, 0fBF7C1C5C, f1287;
mul.f32 f1290, f1273, 0fBEAF1D44;
mul.f32 f2167, f1267, 0fBF708FB2;
sub.f32 f1291, f2167, f1290;
mul.f32 f1292, f1273, 0fBF708FB2;
fma.rn.f32 f1293, f1267, 0fBEAF1D44, f1292;
add.f32 f1294, f1243, f1259;
add.f32 f1295, f1227, f1294;
mul.f32 f1298, f1294, 0f3F000000;
sub.f32 f1299, f1227, f1298;
add.f32 f2166, f2178, f2173;
sub.f32 f1300, f2178, f2173;
mul.f32 f1301, f1300, 0f3F5DB3D7;
add.f32 f1302, f1301, f1299;
sub.f32 f1303, f1299, f1301;
add.f32 f2165, f2183, f2166;
mul.f32 f1304, f2166, 0f3F000000;
sub.f32 f1305, f2183, f1304;
sub.f32 f1306, f1243, f1259;
mul.f32 f1307, f1306, 0f3F5DB3D7;
sub.f32 f1308, f1305, f1307;
add.f32 f1309, f1307, f1305;
add.f32 f1310, f1276, f1281;
add.f32 f1311, f1234, f1310;
mul.f32 f1314, f1310, 0f3F000000;
sub.f32 f1315, f1234, f1314;
add.f32 f2164, f1278, f1283;
sub.f32 f1316, f1278, f1283;
mul.f32 f1317, f1316, 0f3F5DB3D7;
add.f32 f1318, f1317, f1315;
sub.f32 f1319, f1315, f1317;
add.f32 f2163, f1240, f2164;
mul.f32 f1320, f2164, 0f3F000000;
sub.f32 f1321, f1240, f1320;
sub.f32 f1322, f1276, f1281;
mul.f32 f1323, f1322, 0f3F5DB3D7;
sub.f32 f1324, f1321, f1323;
add.f32 f1325, f1323, f1321;
add.f32 f1326, f1286, f1291;
add.f32 f1327, f1235, f1326;
mul.f32 f1330, f1326, 0f3F000000;
sub.f32 f1331, f1235, f1330;
add.f32 f2162, f1288, f1293;
sub.f32 f1332, f1288, f1293;
mul.f32 f1333, f1332, 0f3F5DB3D7;
add.f32 f1334, f1333, f1331;
sub.f32 f1335, f1331, f1333;
add.f32 f2161, f1241, f2162;
mul.f32 f1336, f2162, 0f3F000000;
sub.f32 f1337, f1241, f1336;
sub.f32 f1338, f1286, f1291;
mul.f32 f1339, f1338, 0f3F5DB3D7;
sub.f32 f1340, f1337, f1339;
add.f32 f1341, f1339, f1337;
mul.f32 f1343, f2190, 0fBE6C2691;
mul.f32 f2160, f1195, 0f3F791978;
sub.f32 f1344, f2160, f1343;
mul.f32 f1345, f2190, 0f3F791978;
fma.rn.f32 f1346, f1195, 0fBE6C2691, f1345;
mul.f32 f2158, f1311, 0f3F64C51C;
mul.f32 f2159, f2163, 0fBEE5C902;
sub.f32 f1349, f2158, f2159;
mul.f32 f1350, f2163, 0f3F64C51C;
fma.rn.f32 f1351, f1311, 0fBEE5C902, f1350;
mul.f32 f2156, f1211, 0f3F64C51C;
mul.f32 f2157, f2188, 0fBEE5C902;
sub.f32 f1354, f2156, f2157;
mul.f32 f1355, f2188, 0f3F64C51C;
fma.rn.f32 f1356, f1211, 0fBEE5C902, f1355;
mul.f32 f2154, f1327, 0f3F18DF63;
mul.f32 f2155, f2161, 0fBF4D57F2;
sub.f32 f1359, f2154, f2155;
mul.f32 f1360, f2161, 0f3F18DF63;
fma.rn.f32 f1361, f1327, 0fBF4D57F2, f1360;
mul.f32 f2152, f1186, 0f3F441B7D;
mul.f32 f2153, f1192, 0fBF248DBB;
sub.f32 f1364, f2152, f2153;
mul.f32 f1365, f1192, 0f3F441B7D;
fma.rn.f32 f1366, f1186, 0fBF248DBB, f1365;
mul.f32 f1368, f1308, 0fBF7C1C5C;
mul.f32 f2151, f1302, 0f3E31D0D4;
sub.f32 f1369, f2151, f1368;
mul.f32 f1370, f1308, 0f3E31D0D4;
fma.rn.f32 f1371, f1302, 0fBF7C1C5C, f1370;
mul.f32 f1373, f1208, 0fBF4D57F2;
mul.f32 f2150, f1202, 0f3F18DF63;
sub.f32 f1374, f2150, f1373;
mul.f32 f1375, f1208, 0f3F18DF63;
fma.rn.f32 f1376, f1202, 0fBF4D57F2, f1375;
mul.f32 f1378, f1324, 0fBF753ECD;
mul.f32 f2149, f1318, 0fBE92D7E0;
sub.f32 f1379, f2149, f1378;
mul.f32 f1380, f1324, 0fBE92D7E0;
fma.rn.f32 f1381, f1318, 0fBF753ECD, f1380;
mul.f32 f1383, f1224, 0fBF6B1036;
mul.f32 f2148, f1218, 0f3ECACAF8;
sub.f32 f1384, f2148, f1383;
mul.f32 f1385, f1224, 0f3ECACAF8;
fma.rn.f32 f1386, f1218, 0fBF6B1036, f1385;
mul.f32 f1388, f1340, 0fBF3A3529;
mul.f32 f2147, f1334, 0fBF2FAD88;
sub.f32 f1389, f2147, f1388;
mul.f32 f1390, f1340, 0fBF2FAD88;
fma.rn.f32 f1391, f1334, 0fBF3A3529, f1390;
mul.f32 f1393, f1193, 0fBF7C1C5C;
mul.f32 f2146, f1187, 0f3E31D0D4;
sub.f32 f1394, f2146, f1393;
mul.f32 f1395, f1193, 0f3E31D0D4;
fma.rn.f32 f1396, f1187, 0fBF7C1C5C, f1395;
mul.f32 f2144, f1303, 0fBF708FB2;
mul.f32 f2145, f1309, 0fBEAF1D44;
sub.f32 f1399, f2144, f2145;
mul.f32 f1400, f1309, 0fBF708FB2;
fma.rn.f32 f1401, f1303, 0fBEAF1D44, f1400;
mul.f32 f2142, f1203, 0fBD6E2946;
mul.f32 f2143, f1209, 0fBF7F9120;
sub.f32 f1404, f2142, f2143;
mul.f32 f1405, f1209, 0fBD6E2946;
fma.rn.f32 f1406, f1203, 0fBF7F9120, f1405;
mul.f32 f2140, f1319, 0fBF7E44DE;
mul.f32 f2141, f1325, 0f3DEDC21F;
sub.f32 f1409, f2140, f2141;
mul.f32 f1410, f1325, 0fBF7E44DE;
fma.rn.f32 f1411, f1319, 0f3DEDC21F, f1410;
mul.f32 f1413, f1225, 0fBF753ECD;
mul.f32 f2139, f1219, 0fBE92D7E0;
sub.f32 f1414, f2139, f1413;
mul.f32 f1415, f1225, 0fBE92D7E0;
fma.rn.f32 f1416, f1219, 0fBF753ECD, f1415;
mul.f32 f1418, f1341, 0f3F0CAC9F;
mul.f32 f2138, f1335, 0fBF55E287;
sub.f32 f1419, f2138, f1418;
mul.f32 f1420, f1341, 0fBF55E287;
fma.rn.f32 f1421, f1335, 0f3F0CAC9F, f1420;
add.f32 f1422, f1179, f1295;
add.f32 f1423, f1063, f1422;
mul.f32 f1426, f1422, 0f3F000000;
sub.f32 f1427, f1063, f1426;
add.f32 f2137, f2192, f2165;
sub.f32 f1428, f2192, f2165;
mul.f32 f1429, f1428, 0f3F5DB3D7;
add.f32 f1430, f1429, f1427;
sub.f32 f1431, f1427, f1429;
add.f32 f2136, f2219, f2137;
mul.f32 f1432, f2137, 0f3F000000;
sub.f32 f1433, f2219, f1432;
sub.f32 f1434, f1179, f1295;
mul.f32 f1435, f1434, 0f3F5DB3D7;
sub.f32 f1436, f1433, f1435;
add.f32 f1437, f1435, f1433;
add.f32 f1438, f1344, f1349;
add.f32 f1439, f1079, f1438;
mul.f32 f1442, f1438, 0f3F000000;
sub.f32 f1443, f1079, f1442;
add.f32 f2135, f1346, f1351;
sub.f32 f1444, f1346, f1351;
mul.f32 f1445, f1444, 0f3F5DB3D7;
add.f32 f1446, f1445, f1443;
sub.f32 f1447, f1443, f1445;
add.f32 f2134, f2217, f2135;
mul.f32 f1448, f2135, 0f3F000000;
sub.f32 f1449, f2217, f1448;
sub.f32 f1450, f1344, f1349;
mul.f32 f1451, f1450, 0f3F5DB3D7;
sub.f32 f1452, f1449, f1451;
add.f32 f1453, f1451, f1449;
add.f32 f1454, f1354, f1359;
add.f32 f1455, f1095, f1454;
mul.f32 f1458, f1454, 0f3F000000;
sub.f32 f1459, f1095, f1458;
add.f32 f2133, f1356, f1361;
sub.f32 f1460, f1356, f1361;
mul.f32 f1461, f1460, 0f3F5DB3D7;
add.f32 f1462, f1461, f1459;
sub.f32 f1463, f1459, f1461;
add.f32 f2132, f2215, f2133;
mul.f32 f1464, f2133, 0f3F000000;
sub.f32 f1465, f2215, f1464;
sub.f32 f1466, f1354, f1359;
mul.f32 f1467, f1466, 0f3F5DB3D7;
sub.f32 f1468, f1465, f1467;
add.f32 f1469, f1467, f1465;
add.f32 f1470, f1364, f1369;
add.f32 f1471, f1070, f1470;
mul.f32 f1474, f1470, 0f3F000000;
sub.f32 f1475, f1070, f1474;
add.f32 f2131, f1366, f1371;
sub.f32 f1476, f1366, f1371;
mul.f32 f1477, f1476, 0f3F5DB3D7;
add.f32 f1478, f1477, f1475;
sub.f32 f1479, f1475, f1477;
add.f32 f2130, f1076, f2131;
mul.f32 f1480, f2131, 0f3F000000;
sub.f32 f1481, f1076, f1480;
sub.f32 f1482, f1364, f1369;
mul.f32 f1483, f1482, 0f3F5DB3D7;
sub.f32 f1484, f1481, f1483;
add.f32 f1485, f1483, f1481;
add.f32 f1486, f1374, f1379;
add.f32 f1487, f1086, f1486;
mul.f32 f1490, f1486, 0f3F000000;
sub.f32 f1491, f1086, f1490;
add.f32 f2129, f1376, f1381;
sub.f32 f1492, f1376, f1381;
mul.f32 f1493, f1492, 0f3F5DB3D7;
add.f32 f1494, f1493, f1491;
sub.f32 f1495, f1491, f1493;
add.f32 f2128, f1092, f2129;
mul.f32 f1496, f2129, 0f3F000000;
sub.f32 f1497, f1092, f1496;
sub.f32 f1498, f1374, f1379;
mul.f32 f1499, f1498, 0f3F5DB3D7;
sub.f32 f1500, f1497, f1499;
add.f32 f1501, f1499, f1497;
add.f32 f1502, f1384, f1389;
add.f32 f1503, f1102, f1502;
mul.f32 f1506, f1502, 0f3F000000;
sub.f32 f1507, f1102, f1506;
add.f32 f2127, f1386, f1391;
sub.f32 f1508, f1386, f1391;
mul.f32 f1509, f1508, 0f3F5DB3D7;
add.f32 f1510, f1509, f1507;
sub.f32 f1511, f1507, f1509;
add.f32 f2126, f1108, f2127;
mul.f32 f1512, f2127, 0f3F000000;
sub.f32 f1513, f1108, f1512;
sub.f32 f1514, f1384, f1389;
mul.f32 f1515, f1514, 0f3F5DB3D7;
sub.f32 f1516, f1513, f1515;
add.f32 f1517, f1515, f1513;
add.f32 f1518, f1394, f1399;
add.f32 f1519, f1071, f1518;
mul.f32 f1522, f1518, 0f3F000000;
sub.f32 f1523, f1071, f1522;
add.f32 f2125, f1396, f1401;
sub.f32 f1524, f1396, f1401;
mul.f32 f1525, f1524, 0f3F5DB3D7;
add.f32 f1526, f1525, f1523;
sub.f32 f1527, f1523, f1525;
add.f32 f2124, f1077, f2125;
mul.f32 f1528, f2125, 0f3F000000;
sub.f32 f1529, f1077, f1528;
sub.f32 f1530, f1394, f1399;
mul.f32 f1531, f1530, 0f3F5DB3D7;
sub.f32 f1532, f1529, f1531;
add.f32 f1533, f1531, f1529;
add.f32 f1534, f1404, f1409;
add.f32 f1535, f1087, f1534;
mul.f32 f1538, f1534, 0f3F000000;
sub.f32 f1539, f1087, f1538;
add.f32 f2123, f1406, f1411;
sub.f32 f1540, f1406, f1411;
mul.f32 f1541, f1540, 0f3F5DB3D7;
add.f32 f1542, f1541, f1539;
sub.f32 f1543, f1539, f1541;
add.f32 f2122, f1093, f2123;
mul.f32 f1544, f2123, 0f3F000000;
sub.f32 f1545, f1093, f1544;
sub.f32 f1546, f1404, f1409;
mul.f32 f1547, f1546, 0f3F5DB3D7;
sub.f32 f1548, f1545, f1547;
add.f32 f1549, f1547, f1545;
add.f32 f1550, f1414, f1419;
add.f32 f1551, f1103, f1550;
mul.f32 f1554, f1550, 0f3F000000;
sub.f32 f1555, f1103, f1554;
add.f32 f2121, f1416, f1421;
sub.f32 f1556, f1416, f1421;
mul.f32 f1557, f1556, 0f3F5DB3D7;
add.f32 f1558, f1557, f1555;
sub.f32 f1559, f1555, f1557;
add.f32 f2120, f1109, f2121;
mul.f32 f1560, f2121, 0f3F000000;
sub.f32 f1561, f1109, f1560;
sub.f32 f1562, f1414, f1419;
mul.f32 f1563, f1562, 0f3F5DB3D7;
sub.f32 f1564, f1561, f1563;
add.f32 f1565, f1563, f1561;
mul.wide.u32 rd7, r7, 795364315;
shr.u64 rd8, rd7, 32;
cvt.u32.u64 r11, rd8;
sub.s32 r12, r7, r11;
shr.u32 r13, r12, 1;
add.s32 r14, r13, r11;
shr.u32 r15, r14, 4;
mul.lo.s32 r16, r15, 27;
sub.s32 r17, r7, r16;
mul.wide.u32 rd12, r15, 8;
mov.u64 rd13, %56;
add.s64 rd11, rd13, rd12;
ld.global.v2.f32 {f1566, f1567}, [rd11];
mul.f32 f2118, f1566, f1439;
mul.f32 f2119, f1567, f2134;
sub.f32 f1572, f2118, f2119;
mul.f32 f1573, f1566, f2134;
fma.rn.f32 f1574, f1567, f1439, f1573;
mul.f32 f2116, f1566, f1566;
mul.f32 f2117, f1567, f1567;
sub.f32 f1577, f2116, f2117;
mul.f32 f1578, f1567, f1566;
fma.rn.f32 f1579, f1567, f1566, f1578;
mul.f32 f2114, f1577, f1455;
mul.f32 f2115, f1579, f2132;
sub.f32 f1582, f2114, f2115;
mul.f32 f1583, f1577, f2132;
fma.rn.f32 f1584, f1579, f1455, f1583;
mul.f32 f1586, f1567, f1579;
mul.f32 f2113, f1566, f1577;
sub.f32 f1587, f2113, f1586;
mul.f32 f1588, f1566, f1579;
fma.rn.f32 f1589, f1567, f1577, f1588;
mul.f32 f1591, f1589, f2130;
mul.f32 f2112, f1587, f1471;
sub.f32 f1592, f2112, f1591;
mul.f32 f1593, f1587, f2130;
fma.rn.f32 f1594, f1589, f1471, f1593;
mul.f32 f1596, f1567, f1589;
mul.f32 f2111, f1566, f1587;
sub.f32 f1597, f2111, f1596;
mul.f32 f1598, f1566, f1589;
fma.rn.f32 f1599, f1567, f1587, f1598;
mul.f32 f1601, f1599, f2128;
mul.f32 f2110, f1597, f1487;
sub.f32 f1602, f2110, f1601;
mul.f32 f1603, f1597, f2128;
fma.rn.f32 f1604, f1599, f1487, f1603;
mul.f32 f1606, f1567, f1599;
mul.f32 f2109, f1566, f1597;
sub.f32 f1607, f2109, f1606;
mul.f32 f1608, f1566, f1599;
fma.rn.f32 f1609, f1567, f1597, f1608;
mul.f32 f2107, f1607, f1503;
mul.f32 f2108, f1609, f2126;
sub.f32 f1612, f2107, f2108;
mul.f32 f1613, f1607, f2126;
fma.rn.f32 f1614, f1609, f1503, f1613;
mul.f32 f2105, f1566, f1607;
mul.f32 f2106, f1567, f1609;
sub.f32 f1617, f2105, f2106;
mul.f32 f1618, f1566, f1609;
fma.rn.f32 f1619, f1567, f1607, f1618;
mul.f32 f2103, f1617, f1519;
mul.f32 f2104, f1619, f2124;
sub.f32 f1622, f2103, f2104;
mul.f32 f1623, f1617, f2124;
fma.rn.f32 f1624, f1619, f1519, f1623;
mul.f32 f2101, f1566, f1617;
mul.f32 f2102, f1567, f1619;
sub.f32 f1627, f2101, f2102;
mul.f32 f1628, f1566, f1619;
fma.rn.f32 f1629, f1567, f1617, f1628;
mul.f32 f1631, f1629, f2122;
mul.f32 f2100, f1627, f1535;
sub.f32 f1632, f2100, f1631;
mul.f32 f1633, f1627, f2122;
fma.rn.f32 f1634, f1629, f1535, f1633;
mul.f32 f1636, f1567, f1629;
mul.f32 f2099, f1566, f1627;
sub.f32 f1637, f2099, f1636;
mul.f32 f1638, f1566, f1629;
fma.rn.f32 f1639, f1567, f1627, f1638;
mul.f32 f1641, f1639, f2120;
mul.f32 f2098, f1637, f1551;
sub.f32 f1642, f2098, f1641;
mul.f32 f1643, f1637, f2120;
fma.rn.f32 f1644, f1639, f1551, f1643;
mul.f32 f1646, f1567, f1639;
mul.f32 f2097, f1566, f1637;
sub.f32 f1647, f2097, f1646;
mul.f32 f1648, f1566, f1639;
fma.rn.f32 f1649, f1567, f1637, f1648;
mul.f32 f1651, f1649, f1436;
mul.f32 f2096, f1647, f1430;
sub.f32 f1652, f2096, f1651;
mul.f32 f1653, f1647, f1436;
fma.rn.f32 f1654, f1649, f1430, f1653;
mul.f32 f2094, f1566, f1647;
mul.f32 f2095, f1567, f1649;
sub.f32 f1657, f2094, f2095;
mul.f32 f1658, f1566, f1649;
fma.rn.f32 f1659, f1567, f1647, f1658;
mul.f32 f2092, f1657, f1446;
mul.f32 f2093, f1659, f1452;
sub.f32 f1662, f2092, f2093;
mul.f32 f1663, f1657, f1452;
fma.rn.f32 f1664, f1659, f1446, f1663;
mul.f32 f2090, f1566, f1657;
mul.f32 f2091, f1567, f1659;
sub.f32 f1667, f2090, f2091;
mul.f32 f1668, f1566, f1659;
fma.rn.f32 f1669, f1567, f1657, f1668;
mul.f32 f2088, f1667, f1462;
mul.f32 f2089, f1669, f1468;
sub.f32 f1672, f2088, f2089;
mul.f32 f1673, f1667, f1468;
fma.rn.f32 f1674, f1669, f1462, f1673;
mul.f32 f1676, f1567, f1669;
mul.f32 f2087, f1566, f1667;
sub.f32 f1677, f2087, f1676;
mul.f32 f1678, f1566, f1669;
fma.rn.f32 f1679, f1567, f1667, f1678;
mul.f32 f1681, f1679, f1484;
mul.f32 f2086, f1677, f1478;
sub.f32 f1682, f2086, f1681;
mul.f32 f1683, f1677, f1484;
fma.rn.f32 f1684, f1679, f1478, f1683;
mul.f32 f1686, f1567, f1679;
mul.f32 f2085, f1566, f1677;
sub.f32 f1687, f2085, f1686;
mul.f32 f1688, f1566, f1679;
fma.rn.f32 f1689, f1567, f1677, f1688;
mul.f32 f1691, f1689, f1500;
mul.f32 f2084, f1687, f1494;
sub.f32 f1692, f2084, f1691;
mul.f32 f1693, f1687, f1500;
fma.rn.f32 f1694, f1689, f1494, f1693;
mul.f32 f1696, f1567, f1689;
mul.f32 f2083, f1566, f1687;
sub.f32 f1697, f2083, f1696;
mul.f32 f1698, f1566, f1689;
fma.rn.f32 f1699, f1567, f1687, f1698;
mul.f32 f1701, f1699, f1516;
mul.f32 f2082, f1697, f1510;
sub.f32 f1702, f2082, f1701;
mul.f32 f1703, f1697, f1516;
fma.rn.f32 f1704, f1699, f1510, f1703;
mul.f32 f2080, f1566, f1697;
mul.f32 f2081, f1567, f1699;
sub.f32 f1707, f2080, f2081;
mul.f32 f1708, f1566, f1699;
fma.rn.f32 f1709, f1567, f1697, f1708;
mul.f32 f2078, f1707, f1526;
mul.f32 f2079, f1709, f1532;
sub.f32 f1712, f2078, f2079;
mul.f32 f1713, f1707, f1532;
fma.rn.f32 f1714, f1709, f1526, f1713;
mul.f32 f2076, f1566, f1707;
mul.f32 f2077, f1567, f1709;
sub.f32 f1717, f2076, f2077;
mul.f32 f1718, f1566, f1709;
fma.rn.f32 f1719, f1567, f1707, f1718;
mul.f32 f1721, f1719, f1548;
mul.f32 f2075, f1717, f1542;
sub.f32 f1722, f2075, f1721;
mul.f32 f1723, f1717, f1548;
fma.rn.f32 f1724, f1719, f1542, f1723;
mul.f32 f1726, f1567, f1719;
mul.f32 f2074, f1566, f1717;
sub.f32 f1727, f2074, f1726;
mul.f32 f1728, f1566, f1719;
fma.rn.f32 f1729, f1567, f1717, f1728;
mul.f32 f1731, f1729, f1564;
mul.f32 f2073, f1727, f1558;
sub.f32 f1732, f2073, f1731;
mul.f32 f1733, f1727, f1564;
fma.rn.f32 f1734, f1729, f1558, f1733;
mul.f32 f1736, f1567, f1729;
mul.f32 f2072, f1566, f1727;
sub.f32 f1737, f2072, f1736;
mul.f32 f1738, f1566, f1729;
fma.rn.f32 f1739, f1567, f1727, f1738;
mul.f32 f1741, f1739, f1437;
mul.f32 f2071, f1737, f1431;
sub.f32 f1742, f2071, f1741;
mul.f32 f1743, f1737, f1437;
fma.rn.f32 f1744, f1739, f1431, f1743;
mul.f32 f1746, f1567, f1739;
mul.f32 f2070, f1566, f1737;
sub.f32 f1747, f2070, f1746;
mul.f32 f1748, f1566, f1739;
fma.rn.f32 f1749, f1567, f1737, f1748;
mul.f32 f2068, f1747, f1447;
mul.f32 f2069, f1749, f1453;
sub.f32 f1752, f2068, f2069;
mul.f32 f1753, f1747, f1453;
fma.rn.f32 f1754, f1749, f1447, f1753;
mul.f32 f2066, f1566, f1747;
mul.f32 f2067, f1567, f1749;
sub.f32 f1757, f2066, f2067;
mul.f32 f1758, f1566, f1749;
fma.rn.f32 f1759, f1567, f1747, f1758;
mul.f32 f2064, f1757, f1463;
mul.f32 f2065, f1759, f1469;
sub.f32 f1762, f2064, f2065;
mul.f32 f1763, f1757, f1469;
fma.rn.f32 f1764, f1759, f1463, f1763;
mul.f32 f2062, f1566, f1757;
mul.f32 f2063, f1567, f1759;
sub.f32 f1767, f2062, f2063;
mul.f32 f1768, f1566, f1759;
fma.rn.f32 f1769, f1567, f1757, f1768;
mul.f32 f1771, f1769, f1485;
mul.f32 f2061, f1767, f1479;
sub.f32 f1772, f2061, f1771;
mul.f32 f1773, f1767, f1485;
fma.rn.f32 f1774, f1769, f1479, f1773;
mul.f32 f1776, f1567, f1769;
mul.f32 f2060, f1566, f1767;
sub.f32 f1777, f2060, f1776;
mul.f32 f1778, f1566, f1769;
fma.rn.f32 f1779, f1567, f1767, f1778;
mul.f32 f1781, f1779, f1501;
mul.f32 f2059, f1777, f1495;
sub.f32 f1782, f2059, f1781;
mul.f32 f1783, f1777, f1501;
fma.rn.f32 f1784, f1779, f1495, f1783;
mul.f32 f1786, f1567, f1779;
mul.f32 f2058, f1566, f1777;
sub.f32 f1787, f2058, f1786;
mul.f32 f1788, f1566, f1779;
fma.rn.f32 f1789, f1567, f1777, f1788;
mul.f32 f1791, f1789, f1517;
mul.f32 f2057, f1787, f1511;
sub.f32 f1792, f2057, f1791;
mul.f32 f1793, f1787, f1517;
fma.rn.f32 f1794, f1789, f1511, f1793;
mul.f32 f2055, f1566, f1787;
mul.f32 f2056, f1567, f1789;
sub.f32 f1797, f2055, f2056;
mul.f32 f1798, f1566, f1789;
fma.rn.f32 f1799, f1567, f1787, f1798;
mul.f32 f2053, f1797, f1527;
mul.f32 f2054, f1799, f1533;
sub.f32 f1802, f2053, f2054;
mul.f32 f1803, f1797, f1533;
fma.rn.f32 f1804, f1799, f1527, f1803;
mul.f32 f2051, f1566, f1797;
mul.f32 f2052, f1567, f1799;
sub.f32 f1807, f2051, f2052;
mul.f32 f1808, f1566, f1799;
fma.rn.f32 f1809, f1567, f1797, f1808;
mul.f32 f2049, f1807, f1543;
mul.f32 f2050, f1809, f1549;
sub.f32 f1812, f2049, f2050;
mul.f32 f1813, f1807, f1549;
fma.rn.f32 f1814, f1809, f1543, f1813;
mul.f32 f1816, f1567, f1809;
mul.f32 f2048, f1566, f1807;
sub.f32 f1817, f2048, f1816;
mul.f32 f1818, f1566, f1809;
fma.rn.f32 f1819, f1567, f1807, f1818;
mul.f32 f1821, f1819, f1565;
mul.f32 f2047, f1817, f1559;
sub.f32 f1822, f2047, f1821;
mul.f32 f1823, f1817, f1565;
fma.rn.f32 f1824, f1819, f1559, f1823;
shl.b32 r18, r17, 2;
add.s32 r19, r8, r18;
barrier.sync 0;
mad.lo.s32 r20, r15, 2916, r19;
st.shared.f32 [r20], f1423;
st.shared.f32 [r20+108], f1572;
st.shared.f32 [r20+216], f1582;
st.shared.f32 [r20+324], f1592;
st.shared.f32 [r20+432], f1602;
st.shared.f32 [r20+540], f1612;
st.shared.f32 [r20+648], f1622;
st.shared.f32 [r20+756], f1632;
st.shared.f32 [r20+864], f1642;
st.shared.f32 [r20+972], f1652;
st.shared.f32 [r20+1080], f1662;
st.shared.f32 [r20+1188], f1672;
st.shared.f32 [r20+1296], f1682;
st.shared.f32 [r20+1404], f1692;
st.shared.f32 [r20+1512], f1702;
st.shared.f32 [r20+1620], f1712;
st.shared.f32 [r20+1728], f1722;
st.shared.f32 [r20+1836], f1732;
st.shared.f32 [r20+1944], f1742;
st.shared.f32 [r20+2052], f1752;
st.shared.f32 [r20+2160], f1762;
st.shared.f32 [r20+2268], f1772;
st.shared.f32 [r20+2376], f1782;
st.shared.f32 [r20+2484], f1792;
st.shared.f32 [r20+2592], f1802;
st.shared.f32 [r20+2700], f1812;
st.shared.f32 [r20+2808], f1822;
barrier.sync 0;
ld.shared.f32 f1825, [r10];
ld.shared.f32 f1826, [r10+324];
ld.shared.f32 f1827, [r10+648];
ld.shared.f32 f1828, [r10+972];
ld.shared.f32 f1829, [r10+1296];
ld.shared.f32 f1830, [r10+1620];
ld.shared.f32 f1831, [r10+1944];
ld.shared.f32 f1832, [r10+2268];
ld.shared.f32 f1833, [r10+2592];
ld.shared.f32 f1834, [r10+2916];
ld.shared.f32 f1835, [r10+3240];
ld.shared.f32 f1836, [r10+3564];
ld.shared.f32 f1837, [r10+3888];
ld.shared.f32 f1838, [r10+4212];
ld.shared.f32 f1839, [r10+4536];
ld.shared.f32 f1840, [r10+4860];
ld.shared.f32 f1841, [r10+5184];
ld.shared.f32 f1842, [r10+5508];
ld.shared.f32 f1843, [r10+5832];
ld.shared.f32 f1844, [r10+6156];
ld.shared.f32 f1845, [r10+6480];
ld.shared.f32 f1846, [r10+6804];
ld.shared.f32 f1847, [r10+7128];
ld.shared.f32 f1848, [r10+7452];
ld.shared.f32 f1849, [r10+7776];
ld.shared.f32 f1850, [r10+8100];
ld.shared.f32 f1851, [r10+8424];
barrier.sync 0;
st.shared.f32 [r20], f2136;
st.shared.f32 [r20+108], f1574;
st.shared.f32 [r20+216], f1584;
st.shared.f32 [r20+324], f1594;
st.shared.f32 [r20+432], f1604;
st.shared.f32 [r20+540], f1614;
st.shared.f32 [r20+648], f1624;
st.shared.f32 [r20+756], f1634;
st.shared.f32 [r20+864], f1644;
st.shared.f32 [r20+972], f1654;
st.shared.f32 [r20+1080], f1664;
st.shared.f32 [r20+1188], f1674;
st.shared.f32 [r20+1296], f1684;
st.shared.f32 [r20+1404], f1694;
st.shared.f32 [r20+1512], f1704;
st.shared.f32 [r20+1620], f1714;
st.shared.f32 [r20+1728], f1724;
st.shared.f32 [r20+1836], f1734;
st.shared.f32 [r20+1944], f1744;
st.shared.f32 [r20+2052], f1754;
st.shared.f32 [r20+2160], f1764;
st.shared.f32 [r20+2268], f1774;
st.shared.f32 [r20+2376], f1784;
st.shared.f32 [r20+2484], f1794;
st.shared.f32 [r20+2592], f1804;
st.shared.f32 [r20+2700], f1814;
st.shared.f32 [r20+2808], f1824;
barrier.sync 0;
ld.shared.f32 f1852, [r10];
ld.shared.f32 f1853, [r10+324];
ld.shared.f32 f1854, [r10+648];
ld.shared.f32 f1855, [r10+972];
ld.shared.f32 f1856, [r10+1296];
ld.shared.f32 f1857, [r10+1620];
ld.shared.f32 f1858, [r10+1944];
ld.shared.f32 f1859, [r10+2268];
ld.shared.f32 f1860, [r10+2592];
ld.shared.f32 f1861, [r10+2916];
ld.shared.f32 f1862, [r10+3240];
ld.shared.f32 f1863, [r10+3564];
ld.shared.f32 f1864, [r10+3888];
ld.shared.f32 f1865, [r10+4212];
ld.shared.f32 f1866, [r10+4536];
ld.shared.f32 f1867, [r10+4860];
ld.shared.f32 f1868, [r10+5184];
ld.shared.f32 f1869, [r10+5508];
ld.shared.f32 f1870, [r10+5832];
ld.shared.f32 f1871, [r10+6156];
ld.shared.f32 f1872, [r10+6480];
ld.shared.f32 f1873, [r10+6804];
ld.shared.f32 f1874, [r10+7128];
ld.shared.f32 f1875, [r10+7452];
ld.shared.f32 f1876, [r10+7776];
ld.shared.f32 f1877, [r10+8100];
ld.shared.f32 f1878, [r10+8424];
add.f32 f1879, f1834, f1843;
mul.f32 f1881, f1879, 0f3F000000;
sub.f32 f1882, f1825, f1881;
add.f32 f2046, f1861, f1870;
sub.f32 f1883, f1861, f1870;
mul.f32 f1884, f1883, 0f3F5DB3D7;
mul.f32 f1885, f2046, 0f3F000000;
sub.f32 f1886, f1852, f1885;
sub.f32 f1887, f1834, f1843;
mul.f32 f1888, f1887, 0f3F5DB3D7;
add.f32 f1889, f1835, f1844;
mul.f32 f1891, f1889, 0f3F000000;
sub.f32 f1892, f1826, f1891;
add.f32 f2045, f1862, f1871;
sub.f32 f1893, f1862, f1871;
mul.f32 f1894, f1893, 0f3F5DB3D7;
mul.f32 f1895, f2045, 0f3F000000;
sub.f32 f1896, f1853, f1895;
sub.f32 f1897, f1835, f1844;
mul.f32 f1898, f1897, 0f3F5DB3D7;
add.f32 f1899, f1836, f1845;
mul.f32 f1901, f1899, 0f3F000000;
sub.f32 f1902, f1827, f1901;
add.f32 f2044, f1863, f1872;
sub.f32 f1903, f1863, f1872;
mul.f32 f1904, f1903, 0f3F5DB3D7;
mul.f32 f1905, f2044, 0f3F000000;
sub.f32 f1906, f1854, f1905;
sub.f32 f1907, f1836, f1845;
mul.f32 f1908, f1907, 0f3F5DB3D7;
add.f32 f1909, f1837, f1846;
mul.f32 f1911, f1909, 0f3F000000;
sub.f32 f1912, f1828, f1911;
add.f32 f2043, f1864, f1873;
sub.f32 f1913, f1864, f1873;
mul.f32 f1914, f1913, 0f3F5DB3D7;
mul.f32 f1915, f2043, 0f3F000000;
sub.f32 f1916, f1855, f1915;
sub.f32 f1917, f1837, f1846;
mul.f32 f1918, f1917, 0f3F5DB3D7;
add.f32 f1919, f1838, f1847;
mul.f32 f1921, f1919, 0f3F000000;
sub.f32 f1922, f1829, f1921;
add.f32 f2042, f1865, f1874;
sub.f32 f1923, f1865, f1874;
mul.f32 f1924, f1923, 0f3F5DB3D7;
mul.f32 f1925, f2042, 0f3F000000;
sub.f32 f1926, f1856, f1925;
sub.f32 f1927, f1838, f1847;
mul.f32 f1928, f1927, 0f3F5DB3D7;
add.f32 f1929, f1839, f1848;
mul.f32 f1931, f1929, 0f3F000000;
sub.f32 f1932, f1830, f1931;
add.f32 f2041, f1866, f1875;
sub.f32 f1933, f1866, f1875;
mul.f32 f1934, f1933, 0f3F5DB3D7;
mul.f32 f1935, f2041, 0f3F000000;
sub.f32 f1936, f1857, f1935;
sub.f32 f1937, f1839, f1848;
mul.f32 f1938, f1937, 0f3F5DB3D7;
add.f32 f1939, f1840, f1849;
mul.f32 f1941, f1939, 0f3F000000;
sub.f32 f1942, f1831, f1941;
add.f32 f2040, f1867, f1876;
sub.f32 f1943, f1867, f1876;
mul.f32 f1944, f1943, 0f3F5DB3D7;
mul.f32 f1945, f2040, 0f3F000000;
sub.f32 f1946, f1858, f1945;
sub.f32 f1947, f1840, f1849;
mul.f32 f1948, f1947, 0f3F5DB3D7;
add.f32 f1949, f1841, f1850;
mul.f32 f1951, f1949, 0f3F000000;
sub.f32 f1952, f1832, f1951;
add.f32 f2039, f1868, f1877;
sub.f32 f1953, f1868, f1877;
mul.f32 f1954, f1953, 0f3F5DB3D7;
mul.f32 f1955, f2039, 0f3F000000;
sub.f32 f1956, f1859, f1955;
sub.f32 f1957, f1841, f1850;
mul.f32 f1958, f1957, 0f3F5DB3D7;
add.f32 f1959, f1842, f1851;
mul.f32 f1961, f1959, 0f3F000000;
sub.f32 f1962, f1833, f1961;
add.f32 f2038, f1869, f1878;
sub.f32 f1963, f1869, f1878;
mul.f32 f1964, f1963, 0f3F5DB3D7;
mul.f32 f1965, f2038, 0f3F000000;
sub.f32 f1966, f1860, f1965;
sub.f32 f1967, f1842, f1851;
mul.f32 f2425, f1939, 0f3F000000;
sub.f32 f2424, f1831, f2425;
mul.f32 f1968, f1967, 0f3F5DB3D7;
add.f32 %0, f1825, f1879;
mul.f32 f2427, f2039, 0f3F000000;
sub.f32 f2426, f1859, f2427;
add.f32 %1, f1852, f2046;
mul.f32 f2429, f1939, 0f3F000000;
sub.f32 f2428, f1831, f2429;
mul.f32 f2431, f1929, 0f3F000000;
sub.f32 f2430, f1830, f2431;
add.f32 %2, f1826, f1889;
add.f32 %3, f1853, f2045;
add.f32 %4, f1827, f1899;
add.f32 %5, f1854, f2044;
add.f32 %6, f1828, f1909;
add.f32 %7, f1855, f2043;
add.f32 %8, f1829, f1919;
add.f32 %9, f1856, f2042;
add.f32 %10, f1830, f1929;
add.f32 %11, f1857, f2041;
add.f32 %12, f1831, f1939;
add.f32 %13, f1858, f2040;
add.f32 %14, f1832, f1949;
add.f32 %15, f1859, f2039;
add.f32 %16, f1833, f1959;
add.f32 %17, f1860, f2038;
add.f32 %18, f1884, f1882;
sub.f32 %19, f1886, f1888;
add.f32 %20, f1894, f1892;
sub.f32 %21, f1896, f1898;
add.f32 %22, f1904, f1902;
sub.f32 %23, f1906, f1908;
sub.f32 %25, f1916, f1918;
add.f32 %24, f1914, f1912;
sub.f32 %27, f1926, f1928;
add.f32 %26, f1924, f1922;
sub.f32 %29, f1936, f1938;
add.f32 %28, f1934, f2430;
add.f32 %30, f1944, f2428;
sub.f32 %31, f1946, f1948;
add.f32 %32, f1954, f1952;
sub.f32 %33, f2426, f1958;
add.f32 %34, f1964, f1962;
sub.f32 %35, f1966, f1968;
sub.f32 %36, f1882, f1884;
add.f32 %37, f1888, f1886;
sub.f32 %38, f1892, f1894;
add.f32 %39, f1898, f1896;
sub.f32 %40, f1902, f1904;
add.f32 %41, f1908, f1906;
sub.f32 %42, f1912, f1914;
add.f32 %43, f1918, f1916;
sub.f32 %44, f1922, f1924;
add.f32 %45, f1928, f1926;
sub.f32 %46, f2430, f1934;
add.f32 %47, f1938, f1936;
sub.f32 %48, f2428, f1944;
add.f32 %49, f1948, f1946;
sub.f32 %50, f1952, f1954;
add.f32 %51, f1958, f2426;
sub.f32 %52, f1962, f1964;
add.f32 %53, f1968, f1966;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y), "=f"(rmem[9].x), "=f"(rmem[9].y), "=f"(rmem[10].x), "=f"(rmem[10].y), "=f"(rmem[11].x), "=f"(rmem[11].y), "=f"(rmem[12].x), "=f"(rmem[12].y), "=f"(rmem[13].x), "=f"(rmem[13].y), "=f"(rmem[14].x), "=f"(rmem[14].y), "=f"(rmem[15].x), "=f"(rmem[15].y), "=f"(rmem[16].x), "=f"(rmem[16].y), "=f"(rmem[17].x), "=f"(rmem[17].y), "=f"(rmem[18].x), "=f"(rmem[18].y), "=f"(rmem[19].x), "=f"(rmem[19].y), "=f"(rmem[20].x), "=f"(rmem[20].y), "=f"(rmem[21].x), "=f"(rmem[21].y), "=f"(rmem[22].x), "=f"(rmem[22].y), "=f"(rmem[23].x), "=f"(rmem[23].y), "=f"(rmem[24].x), "=f"(rmem[24].y), "=f"(rmem[25].x), "=f"(rmem[25].y), "=f"(rmem[26].x), "=f"(rmem[26].y): "r"(smem), "l"(lut_sp_27_2187), "l"(lut_sp_27_81), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y), "f"(rmem[9].x), "f"(rmem[9].y), "f"(rmem[10].x), "f"(rmem[10].y), "f"(rmem[11].x), "f"(rmem[11].y), "f"(rmem[12].x), "f"(rmem[12].y), "f"(rmem[13].x), "f"(rmem[13].y), "f"(rmem[14].x), "f"(rmem[14].y), "f"(rmem[15].x), "f"(rmem[15].y), "f"(rmem[16].x), "f"(rmem[16].y), "f"(rmem[17].x), "f"(rmem[17].y), "f"(rmem[18].x), "f"(rmem[18].y), "f"(rmem[19].x), "f"(rmem[19].y), "f"(rmem[20].x), "f"(rmem[20].y), "f"(rmem[21].x), "f"(rmem[21].y), "f"(rmem[22].x), "f"(rmem[22].y), "f"(rmem[23].x), "f"(rmem[23].y), "f"(rmem[24].x), "f"(rmem[24].y), "f"(rmem[25].x), "f"(rmem[25].y), "f"(rmem[26].x), "f"(rmem[26].y), "f"(rmem[10].y), "f"(rmem[19].y), "f"(rmem[1].y), "f"(rmem[22].y), "f"(rmem[13].y), "f"(rmem[4].y), "f"(rmem[16].y), "f"(rmem[25].y), "f"(rmem[7].y), "f"(rmem[11].y), "f"(rmem[20].y), "f"(rmem[2].y), "f"(rmem[23].y), "f"(rmem[14].y), "f"(rmem[5].y), "f"(rmem[17].y), "f"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<148, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<311>;
.reg .b32 r<46>;
.reg .b64 rd<32>;
mov.u32 r1, %tid.y;
mov.u32 r2, %6;
mad.lo.s32 r3, r1, 17496, r2;
mov.u32 r4, %tid.x;
add.f32 f13, %15, %18;
add.f32 f14, %17, %19;
mul.f32 f15, f13, 0f3F000000;
sub.f32 f16, %13, f15;
sub.f32 f17, %17, %19;
mul.f32 f18, f17, 0f3F5DB3D7;
add.f32 f19, f18, f16;
sub.f32 f20, f16, f18;
mul.f32 f21, f14, 0f3F000000;
sub.f32 f22, %14, f21;
sub.f32 f23, %15, %18;
mul.f32 f24, f23, 0f3F5DB3D7;
sub.f32 f25, f22, f24;
add.f32 f26, f24, f22;
mul.wide.u32 rd2, r4, 1508246403;
shr.u64 rd3, rd2, 40;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 729;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 17496, r3;
mul.wide.u32 rd4, r7, 8;
mov.u64 rd5, %7;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f27, f28}, [rd6];
mul.f32 f31, f27, f19;
mul.f32 f32, f28, f25;
mul.f32 f33, f27, f25;
mul.f32 f34, f27, f27;
mul.f32 f35, f28, f28;
sub.f32 f36, f34, f35;
mul.f32 f37, f28, f27;
fma.rn.f32 f38, f28, f27, f37;
mul.f32 f39, f36, f20;
mul.f32 f40, f38, f26;
mul.f32 f41, f36, f26;
barrier.sync 0;
mad.lo.s32 r9, r7, 24, r8;
add.f32 f42, %14, f14;
add.f32 f43, %13, f13;
st.shared.v2.f32 [r9], {f43, f42};
fma.rn.f32 f44, f28, f19, f33;
sub.f32 f45, f31, f32;
st.shared.v2.f32 [r9+8], {f45, f44};
sub.f32 f46, f39, f40;
fma.rn.f32 f47, f38, f20, f41;
st.shared.v2.f32 [r9+16], {f46, f47};
barrier.sync 0;
shl.b32 r10, r7, 4;
sub.s32 r11, r9, r10;
ld.shared.v2.f32 {f48, f49}, [r11];
ld.shared.v2.f32 {f52, f53}, [r11+5832];
ld.shared.v2.f32 {f56, f57}, [r11+11664];
add.f32 f60, f52, f56;
add.f32 f61, f53, f57;
mul.f32 f62, f60, 0f3F000000;
sub.f32 f63, f48, f62;
sub.f32 f64, f53, f57;
mul.f32 f65, f64, 0f3F5DB3D7;
add.f32 f66, f65, f63;
sub.f32 f67, f63, f65;
mul.f32 f68, f61, 0f3F000000;
sub.f32 f69, f49, f68;
sub.f32 f70, f52, f56;
mul.f32 f71, f70, 0f3F5DB3D7;
sub.f32 f72, f69, f71;
add.f32 f73, f71, f69;
mul.wide.u32 rd7, r7, -1431655765;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 3;
sub.s32 r14, r7, r13;
shl.b32 r15, r14, 3;
add.s32 r16, r8, r15;
mul.wide.u32 rd9, r12, 8;
mov.u64 rd10, %8;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f74, f75}, [rd11];
mul.f32 f78, f74, f66;
mul.f32 f79, f75, f72;
mul.f32 f80, f74, f72;
mul.f32 f81, f74, f74;
mul.f32 f82, f75, f75;
sub.f32 f83, f81, f82;
mul.f32 f84, f75, f74;
fma.rn.f32 f85, f75, f74, f84;
mul.f32 f86, f83, f67;
mul.f32 f87, f85, f73;
mul.f32 f88, f83, f73;
barrier.sync 0;
mad.lo.s32 r17, r12, 72, r16;
add.f32 f89, f49, f61;
add.f32 f90, f48, f60;
st.shared.v2.f32 [r17], {f90, f89};
fma.rn.f32 f91, f75, f66, f80;
sub.f32 f92, f78, f79;
st.shared.v2.f32 [r17+24], {f92, f91};
fma.rn.f32 f93, f85, f67, f88;
sub.f32 f94, f86, f87;
st.shared.v2.f32 [r17+48], {f94, f93};
barrier.sync 0;
ld.shared.v2.f32 {f95, f96}, [r11];
ld.shared.v2.f32 {f99, f100}, [r11+5832];
ld.shared.v2.f32 {f103, f104}, [r11+11664];
add.f32 f107, f99, f103;
add.f32 f108, f100, f104;
mul.f32 f109, f107, 0f3F000000;
sub.f32 f110, f95, f109;
sub.f32 f111, f100, f104;
mul.f32 f112, f111, 0f3F5DB3D7;
add.f32 f113, f112, f110;
sub.f32 f114, f110, f112;
mul.f32 f115, f108, 0f3F000000;
sub.f32 f116, f96, f115;
sub.f32 f117, f99, f103;
mul.f32 f118, f117, 0f3F5DB3D7;
sub.f32 f119, f116, f118;
add.f32 f120, f118, f116;
mul.wide.u32 rd12, r7, 954437177;
shr.u64 rd13, rd12, 33;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 9;
sub.s32 r20, r7, r19;
shl.b32 r21, r20, 3;
add.s32 r22, r8, r21;
mul.wide.u32 rd14, r18, 8;
mov.u64 rd15, %9;
add.s64 rd16, rd15, rd14;
ld.global.v2.f32 {f121, f122}, [rd16];
mul.f32 f125, f121, f113;
mul.f32 f126, f122, f119;
mul.f32 f127, f121, f119;
mul.f32 f128, f121, f121;
mul.f32 f129, f122, f122;
sub.f32 f130, f128, f129;
mul.f32 f131, f122, f121;
fma.rn.f32 f132, f122, f121, f131;
mul.f32 f133, f130, f114;
mul.f32 f134, f132, f120;
mul.f32 f135, f130, f120;
barrier.sync 0;
mad.lo.s32 r23, r18, 216, r22;
add.f32 f136, f96, f108;
add.f32 f137, f95, f107;
st.shared.v2.f32 [r23], {f137, f136};
fma.rn.f32 f138, f122, f113, f127;
sub.f32 f139, f125, f126;
st.shared.v2.f32 [r23+72], {f139, f138};
fma.rn.f32 f140, f132, f114, f135;
sub.f32 f141, f133, f134;
st.shared.v2.f32 [r23+144], {f141, f140};
barrier.sync 0;
ld.shared.v2.f32 {f142, f143}, [r11];
ld.shared.v2.f32 {f146, f147}, [r11+5832];
ld.shared.v2.f32 {f150, f151}, [r11+11664];
add.f32 f154, f146, f150;
add.f32 f155, f147, f151;
mul.f32 f156, f154, 0f3F000000;
sub.f32 f157, f142, f156;
sub.f32 f158, f147, f151;
mul.f32 f159, f158, 0f3F5DB3D7;
add.f32 f160, f159, f157;
sub.f32 f161, f157, f159;
mul.f32 f162, f155, 0f3F000000;
sub.f32 f163, f143, f162;
sub.f32 f164, f146, f150;
mul.f32 f165, f164, 0f3F5DB3D7;
sub.f32 f166, f163, f165;
add.f32 f167, f165, f163;
mul.wide.u32 rd17, r7, 795364315;
shr.u64 rd18, rd17, 32;
cvt.u32.u64 r24, rd18;
sub.s32 r25, r7, r24;
shr.u32 r26, r25, 1;
add.s32 r27, r26, r24;
shr.u32 r28, r27, 4;
mul.lo.s32 r29, r28, 27;
sub.s32 r30, r7, r29;
shl.b32 r31, r30, 3;
add.s32 r32, r8, r31;
mul.wide.u32 rd19, r28, 8;
mov.u64 rd20, %10;
add.s64 rd21, rd20, rd19;
ld.global.v2.f32 {f168, f169}, [rd21];
mul.f32 f172, f168, f160;
mul.f32 f173, f169, f166;
mul.f32 f174, f168, f166;
mul.f32 f175, f168, f168;
mul.f32 f176, f169, f169;
sub.f32 f177, f175, f176;
mul.f32 f178, f169, f168;
fma.rn.f32 f179, f169, f168, f178;
mul.f32 f180, f177, f161;
mul.f32 f181, f179, f167;
mul.f32 f182, f177, f167;
barrier.sync 0;
mad.lo.s32 r33, r28, 648, r32;
add.f32 f183, f143, f155;
add.f32 f184, f142, f154;
st.shared.v2.f32 [r33], {f184, f183};
fma.rn.f32 f185, f169, f160, f174;
sub.f32 f186, f172, f173;
st.shared.v2.f32 [r33+216], {f186, f185};
fma.rn.f32 f187, f179, f161, f182;
sub.f32 f188, f180, f181;
st.shared.v2.f32 [r33+432], {f188, f187};
barrier.sync 0;
ld.shared.v2.f32 {f189, f190}, [r11];
ld.shared.v2.f32 {f193, f194}, [r11+5832];
ld.shared.v2.f32 {f197, f198}, [r11+11664];
add.f32 f201, f193, f197;
add.f32 f202, f194, f198;
mul.f32 f203, f201, 0f3F000000;
sub.f32 f204, f189, f203;
sub.f32 f205, f194, f198;
mul.f32 f206, f205, 0f3F5DB3D7;
add.f32 f207, f206, f204;
sub.f32 f208, f204, f206;
mul.f32 f209, f202, 0f3F000000;
sub.f32 f210, f190, f209;
sub.f32 f211, f193, f197;
mul.f32 f212, f211, 0f3F5DB3D7;
sub.f32 f213, f210, f212;
add.f32 f214, f212, f210;
mul.wide.u32 rd22, r7, -901412889;
shr.u64 rd23, rd22, 38;
cvt.u32.u64 r34, rd23;
mul.lo.s32 r35, r34, 81;
sub.s32 r36, r7, r35;
shl.b32 r37, r36, 3;
add.s32 r38, r8, r37;
mul.wide.u32 rd24, r34, 8;
mov.u64 rd25, %11;
add.s64 rd26, rd25, rd24;
ld.global.v2.f32 {f215, f216}, [rd26];
mul.f32 f219, f215, f207;
mul.f32 f220, f216, f213;
mul.f32 f221, f215, f213;
mul.f32 f222, f215, f215;
mul.f32 f223, f216, f216;
sub.f32 f224, f222, f223;
mul.f32 f225, f216, f215;
fma.rn.f32 f226, f216, f215, f225;
mul.f32 f227, f224, f208;
mul.f32 f228, f226, f214;
mul.f32 f229, f224, f214;
barrier.sync 0;
mad.lo.s32 r39, r34, 1944, r38;
add.f32 f230, f190, f202;
add.f32 f231, f189, f201;
st.shared.v2.f32 [r39], {f231, f230};
fma.rn.f32 f232, f216, f207, f221;
sub.f32 f233, f219, f220;
st.shared.v2.f32 [r39+648], {f233, f232};
fma.rn.f32 f234, f226, f208, f229;
sub.f32 f235, f227, f228;
st.shared.v2.f32 [r39+1296], {f235, f234};
barrier.sync 0;
ld.shared.v2.f32 {f236, f237}, [r11];
ld.shared.v2.f32 {f240, f241}, [r11+5832];
ld.shared.v2.f32 {f244, f245}, [r11+11664];
add.f32 f248, f240, f244;
add.f32 f249, f241, f245;
mul.f32 f250, f248, 0f3F000000;
sub.f32 f251, f236, f250;
sub.f32 f252, f241, f245;
mul.f32 f253, f252, 0f3F5DB3D7;
add.f32 f254, f253, f251;
sub.f32 f255, f251, f253;
mul.f32 f256, f249, 0f3F000000;
sub.f32 f257, f237, f256;
sub.f32 f258, f240, f244;
mul.f32 f259, f258, 0f3F5DB3D7;
sub.f32 f260, f257, f259;
add.f32 f261, f259, f257;
mul.wide.u32 rd27, r7, -2032597691;
shr.u64 rd28, rd27, 39;
cvt.u32.u64 r40, rd28;
mul.lo.s32 r41, r40, 243;
sub.s32 r42, r7, r41;
shl.b32 r43, r42, 3;
add.s32 r44, r8, r43;
mul.wide.u32 rd29, r40, 8;
mov.u64 rd30, %12;
add.s64 rd31, rd30, rd29;
ld.global.v2.f32 {f262, f263}, [rd31];
mul.f32 f266, f262, f254;
mul.f32 f267, f263, f260;
mul.f32 f268, f262, f260;
mul.f32 f269, f262, f262;
mul.f32 f270, f263, f263;
sub.f32 f271, f269, f270;
mul.f32 f272, f263, f262;
fma.rn.f32 f273, f263, f262, f272;
mul.f32 f274, f271, f255;
mul.f32 f275, f273, f261;
mul.f32 f276, f271, f261;
barrier.sync 0;
mad.lo.s32 r45, r40, 5832, r44;
add.f32 f277, f237, f249;
add.f32 f278, f236, f248;
st.shared.v2.f32 [r45], {f278, f277};
fma.rn.f32 f279, f263, f254, f268;
sub.f32 f280, f266, f267;
st.shared.v2.f32 [r45+1944], {f280, f279};
fma.rn.f32 f281, f273, f255, f276;
sub.f32 f282, f274, f275;
st.shared.v2.f32 [r45+3888], {f282, f281};
barrier.sync 0;
ld.shared.v2.f32 {f283, f284}, [r11];
ld.shared.v2.f32 {f287, f288}, [r11+5832];
ld.shared.v2.f32 {f291, f292}, [r11+11664];
add.f32 f295, f287, f291;
add.f32 f296, f288, f292;
mul.f32 f297, f295, 0f3F000000;
sub.f32 f298, f283, f297;
sub.f32 f299, f288, f292;
mul.f32 f300, f299, 0f3F5DB3D7;
mul.f32 f301, f296, 0f3F000000;
sub.f32 f302, f284, f301;
sub.f32 f303, f287, f291;
mul.f32 f304, f303, 0f3F5DB3D7;
add.f32 %1, f284, f296;
add.f32 %0, f283, f295;
sub.f32 %3, f302, f304;
add.f32 %2, f300, f298;
add.f32 %5, f304, f302;
sub.f32 %4, f298, f300;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y): "r"(smem), "l"(lut_sp_3_2187), "l"(lut_sp_3_729), "l"(lut_sp_3_243), "l"(lut_sp_3_81), "l"(lut_sp_3_27), "l"(lut_sp_3_9), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<149, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<275>;
.reg .b32 r<46>;
.reg .b64 rd<32>;
mov.u32 r1, %tid.y;
mov.u32 r2, %6;
mad.lo.s32 r3, r1, 8748, r2;
mov.u32 r4, %tid.x;
add.f32 f13, %15, %18;
add.f32 f14, %13, f13;
add.f32 f15, %17, %19;
add.f32 f16, %14, f15;
mul.f32 f17, f13, 0f3F000000;
sub.f32 f18, %13, f17;
sub.f32 f19, %17, %19;
mul.f32 f20, f19, 0f3F5DB3D7;
add.f32 f21, f20, f18;
sub.f32 f22, f18, f20;
mul.f32 f23, f15, 0f3F000000;
sub.f32 f24, %14, f23;
sub.f32 f25, %15, %18;
mul.f32 f26, f25, 0f3F5DB3D7;
sub.f32 f27, f24, f26;
add.f32 f28, f26, f24;
mul.wide.u32 rd2, r4, 1508246403;
shr.u64 rd3, rd2, 40;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 729;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 8748, r3;
mul.wide.u32 rd4, r7, 8;
mov.u64 rd5, %7;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f29, f30}, [rd6];
mul.f32 f33, f29, f21;
mul.f32 f34, f30, f27;
sub.f32 f35, f33, f34;
mul.f32 f36, f29, f27;
fma.rn.f32 f37, f30, f21, f36;
mul.f32 f38, f29, f29;
mul.f32 f39, f30, f30;
sub.f32 f40, f38, f39;
mul.f32 f41, f30, f29;
fma.rn.f32 f42, f30, f29, f41;
mul.f32 f43, f40, f22;
mul.f32 f44, f42, f28;
sub.f32 f45, f43, f44;
mul.f32 f46, f40, f28;
fma.rn.f32 f47, f42, f22, f46;
barrier.sync 0;
mad.lo.s32 r9, r7, 12, r8;
st.shared.f32 [r9], f14;
st.shared.f32 [r9+4], f35;
st.shared.f32 [r9+8], f45;
barrier.sync 0;
shl.b32 r10, r7, 3;
sub.s32 r11, r9, r10;
ld.shared.f32 f48, [r11];
ld.shared.f32 f49, [r11+2916];
ld.shared.f32 f50, [r11+5832];
barrier.sync 0;
st.shared.f32 [r9], f16;
st.shared.f32 [r9+4], f37;
st.shared.f32 [r9+8], f47;
barrier.sync 0;
ld.shared.f32 f51, [r11];
ld.shared.f32 f52, [r11+2916];
ld.shared.f32 f53, [r11+5832];
add.f32 f54, f49, f50;
add.f32 f55, f48, f54;
add.f32 f56, f52, f53;
add.f32 f57, f51, f56;
mul.f32 f58, f54, 0f3F000000;
sub.f32 f59, f48, f58;
sub.f32 f60, f52, f53;
mul.f32 f61, f60, 0f3F5DB3D7;
add.f32 f62, f61, f59;
sub.f32 f63, f59, f61;
mul.f32 f64, f56, 0f3F000000;
sub.f32 f65, f51, f64;
sub.f32 f66, f49, f50;
mul.f32 f67, f66, 0f3F5DB3D7;
sub.f32 f68, f65, f67;
add.f32 f69, f67, f65;
mul.wide.u32 rd7, r7, -1431655765;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 3;
sub.s32 r14, r7, r13;
shl.b32 r15, r14, 2;
add.s32 r16, r8, r15;
mul.wide.u32 rd9, r12, 8;
mov.u64 rd10, %8;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f70, f71}, [rd11];
mul.f32 f74, f70, f62;
mul.f32 f75, f71, f68;
sub.f32 f76, f74, f75;
mul.f32 f77, f70, f68;
fma.rn.f32 f78, f71, f62, f77;
mul.f32 f79, f70, f70;
mul.f32 f80, f71, f71;
sub.f32 f81, f79, f80;
mul.f32 f82, f71, f70;
fma.rn.f32 f83, f71, f70, f82;
mul.f32 f84, f81, f63;
mul.f32 f85, f83, f69;
sub.f32 f86, f84, f85;
mul.f32 f87, f81, f69;
fma.rn.f32 f88, f83, f63, f87;
barrier.sync 0;
mad.lo.s32 r17, r12, 36, r16;
st.shared.f32 [r17], f55;
st.shared.f32 [r17+12], f76;
st.shared.f32 [r17+24], f86;
barrier.sync 0;
ld.shared.f32 f89, [r11];
ld.shared.f32 f90, [r11+2916];
ld.shared.f32 f91, [r11+5832];
barrier.sync 0;
st.shared.f32 [r17], f57;
st.shared.f32 [r17+12], f78;
st.shared.f32 [r17+24], f88;
barrier.sync 0;
ld.shared.f32 f92, [r11];
ld.shared.f32 f93, [r11+2916];
ld.shared.f32 f94, [r11+5832];
add.f32 f95, f90, f91;
add.f32 f96, f89, f95;
add.f32 f97, f93, f94;
add.f32 f98, f92, f97;
mul.f32 f99, f95, 0f3F000000;
sub.f32 f100, f89, f99;
sub.f32 f101, f93, f94;
mul.f32 f102, f101, 0f3F5DB3D7;
add.f32 f103, f102, f100;
sub.f32 f104, f100, f102;
mul.f32 f105, f97, 0f3F000000;
sub.f32 f106, f92, f105;
sub.f32 f107, f90, f91;
mul.f32 f108, f107, 0f3F5DB3D7;
sub.f32 f109, f106, f108;
add.f32 f110, f108, f106;
mul.wide.u32 rd12, r7, 954437177;
shr.u64 rd13, rd12, 33;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 9;
sub.s32 r20, r7, r19;
shl.b32 r21, r20, 2;
add.s32 r22, r8, r21;
mul.wide.u32 rd14, r18, 8;
mov.u64 rd15, %9;
add.s64 rd16, rd15, rd14;
ld.global.v2.f32 {f111, f112}, [rd16];
mul.f32 f115, f111, f103;
mul.f32 f116, f112, f109;
sub.f32 f117, f115, f116;
mul.f32 f118, f111, f109;
fma.rn.f32 f119, f112, f103, f118;
mul.f32 f120, f111, f111;
mul.f32 f121, f112, f112;
sub.f32 f122, f120, f121;
mul.f32 f123, f112, f111;
fma.rn.f32 f124, f112, f111, f123;
mul.f32 f125, f122, f104;
mul.f32 f126, f124, f110;
sub.f32 f127, f125, f126;
mul.f32 f128, f122, f110;
fma.rn.f32 f129, f124, f104, f128;
barrier.sync 0;
mad.lo.s32 r23, r18, 108, r22;
st.shared.f32 [r23], f96;
st.shared.f32 [r23+36], f117;
st.shared.f32 [r23+72], f127;
barrier.sync 0;
ld.shared.f32 f130, [r11];
ld.shared.f32 f131, [r11+2916];
ld.shared.f32 f132, [r11+5832];
barrier.sync 0;
st.shared.f32 [r23], f98;
st.shared.f32 [r23+36], f119;
st.shared.f32 [r23+72], f129;
barrier.sync 0;
ld.shared.f32 f133, [r11];
ld.shared.f32 f134, [r11+2916];
ld.shared.f32 f135, [r11+5832];
add.f32 f136, f131, f132;
add.f32 f137, f130, f136;
add.f32 f138, f134, f135;
add.f32 f139, f133, f138;
mul.f32 f140, f136, 0f3F000000;
sub.f32 f141, f130, f140;
sub.f32 f142, f134, f135;
mul.f32 f143, f142, 0f3F5DB3D7;
add.f32 f144, f143, f141;
sub.f32 f145, f141, f143;
mul.f32 f146, f138, 0f3F000000;
sub.f32 f147, f133, f146;
sub.f32 f148, f131, f132;
mul.f32 f149, f148, 0f3F5DB3D7;
sub.f32 f150, f147, f149;
add.f32 f151, f149, f147;
mul.wide.u32 rd17, r7, 795364315;
shr.u64 rd18, rd17, 32;
cvt.u32.u64 r24, rd18;
sub.s32 r25, r7, r24;
shr.u32 r26, r25, 1;
add.s32 r27, r26, r24;
shr.u32 r28, r27, 4;
mul.lo.s32 r29, r28, 27;
sub.s32 r30, r7, r29;
shl.b32 r31, r30, 2;
add.s32 r32, r8, r31;
mul.wide.u32 rd19, r28, 8;
mov.u64 rd20, %10;
add.s64 rd21, rd20, rd19;
ld.global.v2.f32 {f152, f153}, [rd21];
mul.f32 f156, f152, f144;
mul.f32 f157, f153, f150;
sub.f32 f158, f156, f157;
mul.f32 f159, f152, f150;
fma.rn.f32 f160, f153, f144, f159;
mul.f32 f161, f152, f152;
mul.f32 f162, f153, f153;
sub.f32 f163, f161, f162;
mul.f32 f164, f153, f152;
fma.rn.f32 f165, f153, f152, f164;
mul.f32 f166, f163, f145;
mul.f32 f167, f165, f151;
sub.f32 f168, f166, f167;
mul.f32 f169, f163, f151;
fma.rn.f32 f170, f165, f145, f169;
barrier.sync 0;
mad.lo.s32 r33, r28, 324, r32;
st.shared.f32 [r33], f137;
st.shared.f32 [r33+108], f158;
st.shared.f32 [r33+216], f168;
barrier.sync 0;
ld.shared.f32 f171, [r11];
ld.shared.f32 f172, [r11+2916];
ld.shared.f32 f173, [r11+5832];
barrier.sync 0;
st.shared.f32 [r33], f139;
st.shared.f32 [r33+108], f160;
st.shared.f32 [r33+216], f170;
barrier.sync 0;
ld.shared.f32 f174, [r11];
ld.shared.f32 f175, [r11+2916];
ld.shared.f32 f176, [r11+5832];
add.f32 f177, f172, f173;
add.f32 f178, f171, f177;
add.f32 f179, f175, f176;
add.f32 f180, f174, f179;
mul.f32 f181, f177, 0f3F000000;
sub.f32 f182, f171, f181;
sub.f32 f183, f175, f176;
mul.f32 f184, f183, 0f3F5DB3D7;
add.f32 f185, f184, f182;
sub.f32 f186, f182, f184;
mul.f32 f187, f179, 0f3F000000;
sub.f32 f188, f174, f187;
sub.f32 f189, f172, f173;
mul.f32 f190, f189, 0f3F5DB3D7;
sub.f32 f191, f188, f190;
add.f32 f192, f190, f188;
mul.wide.u32 rd22, r7, -901412889;
shr.u64 rd23, rd22, 38;
cvt.u32.u64 r34, rd23;
mul.lo.s32 r35, r34, 81;
sub.s32 r36, r7, r35;
shl.b32 r37, r36, 2;
add.s32 r38, r8, r37;
mul.wide.u32 rd24, r34, 8;
mov.u64 rd25, %11;
add.s64 rd26, rd25, rd24;
ld.global.v2.f32 {f193, f194}, [rd26];
mul.f32 f197, f193, f185;
mul.f32 f198, f194, f191;
sub.f32 f199, f197, f198;
mul.f32 f200, f193, f191;
fma.rn.f32 f201, f194, f185, f200;
mul.f32 f202, f193, f193;
mul.f32 f203, f194, f194;
sub.f32 f204, f202, f203;
mul.f32 f205, f194, f193;
fma.rn.f32 f206, f194, f193, f205;
mul.f32 f207, f204, f186;
mul.f32 f208, f206, f192;
sub.f32 f209, f207, f208;
mul.f32 f210, f204, f192;
fma.rn.f32 f211, f206, f186, f210;
barrier.sync 0;
mad.lo.s32 r39, r34, 972, r38;
st.shared.f32 [r39], f178;
st.shared.f32 [r39+324], f199;
st.shared.f32 [r39+648], f209;
barrier.sync 0;
ld.shared.f32 f212, [r11];
ld.shared.f32 f213, [r11+2916];
ld.shared.f32 f214, [r11+5832];
barrier.sync 0;
st.shared.f32 [r39], f180;
st.shared.f32 [r39+324], f201;
st.shared.f32 [r39+648], f211;
barrier.sync 0;
ld.shared.f32 f215, [r11];
ld.shared.f32 f216, [r11+2916];
ld.shared.f32 f217, [r11+5832];
add.f32 f218, f213, f214;
add.f32 f219, f212, f218;
add.f32 f220, f216, f217;
add.f32 f221, f215, f220;
mul.f32 f222, f218, 0f3F000000;
sub.f32 f223, f212, f222;
sub.f32 f224, f216, f217;
mul.f32 f225, f224, 0f3F5DB3D7;
add.f32 f226, f225, f223;
sub.f32 f227, f223, f225;
mul.f32 f228, f220, 0f3F000000;
sub.f32 f229, f215, f228;
sub.f32 f230, f213, f214;
mul.f32 f231, f230, 0f3F5DB3D7;
sub.f32 f232, f229, f231;
add.f32 f233, f231, f229;
mul.wide.u32 rd27, r7, -2032597691;
shr.u64 rd28, rd27, 39;
cvt.u32.u64 r40, rd28;
mul.lo.s32 r41, r40, 243;
sub.s32 r42, r7, r41;
shl.b32 r43, r42, 2;
add.s32 r44, r8, r43;
mul.wide.u32 rd29, r40, 8;
mov.u64 rd30, %12;
add.s64 rd31, rd30, rd29;
ld.global.v2.f32 {f234, f235}, [rd31];
mul.f32 f238, f234, f226;
mul.f32 f239, f235, f232;
sub.f32 f240, f238, f239;
mul.f32 f241, f234, f232;
fma.rn.f32 f242, f235, f226, f241;
mul.f32 f243, f234, f234;
mul.f32 f244, f235, f235;
sub.f32 f245, f243, f244;
mul.f32 f246, f235, f234;
fma.rn.f32 f247, f235, f234, f246;
mul.f32 f248, f245, f227;
mul.f32 f249, f247, f233;
sub.f32 f250, f248, f249;
mul.f32 f251, f245, f233;
fma.rn.f32 f252, f247, f227, f251;
barrier.sync 0;
mad.lo.s32 r45, r40, 2916, r44;
st.shared.f32 [r45], f219;
st.shared.f32 [r45+972], f240;
st.shared.f32 [r45+1944], f250;
barrier.sync 0;
ld.shared.f32 f253, [r11];
ld.shared.f32 f254, [r11+2916];
ld.shared.f32 f255, [r11+5832];
barrier.sync 0;
st.shared.f32 [r45], f221;
st.shared.f32 [r45+972], f242;
st.shared.f32 [r45+1944], f252;
barrier.sync 0;
ld.shared.f32 f256, [r11];
ld.shared.f32 f257, [r11+2916];
ld.shared.f32 f258, [r11+5832];
add.f32 f259, f254, f255;
add.f32 f260, f257, f258;
mul.f32 f261, f259, 0f3F000000;
sub.f32 f262, f253, f261;
sub.f32 f263, f257, f258;
mul.f32 f264, f263, 0f3F5DB3D7;
mul.f32 f265, f260, 0f3F000000;
sub.f32 f266, f256, f265;
sub.f32 f267, f254, f255;
mul.f32 f268, f267, 0f3F5DB3D7;
add.f32 %0, f253, f259;
add.f32 %1, f256, f260;
add.f32 %2, f264, f262;
sub.f32 %3, f266, f268;
sub.f32 %4, f262, f264;
add.f32 %5, f268, f266;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y): "r"(smem), "l"(lut_sp_3_2187), "l"(lut_sp_3_729), "l"(lut_sp_3_243), "l"(lut_sp_3_81), "l"(lut_sp_3_27), "l"(lut_sp_3_9), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y));
};


#endif
